未验证 提交 50ad760c 编写于 作者: C Chen Weihang 提交者: GitHub

remove experimental namespace of Tensor (#51155)

上级 771b589d
...@@ -33,10 +33,9 @@ ...@@ -33,10 +33,9 @@
namespace paddle { namespace paddle {
namespace distributed { namespace distributed {
using Tensor = paddle::experimental::Tensor; using Tensor = paddle::Tensor;
using Scalar = paddle::experimental::ScalarBase<paddle::experimental::Tensor>; using Scalar = paddle::experimental::ScalarBase<paddle::Tensor>;
using IntArray = using IntArray = paddle::experimental::IntArrayBase<paddle::Tensor>;
paddle::experimental::IntArrayBase<paddle::experimental::Tensor>;
using Backend = paddle::experimental::Backend; using Backend = paddle::experimental::Backend;
std::vector<std::vector<size_t>> Eager_AssignGroupBySize( std::vector<std::vector<size_t>> Eager_AssignGroupBySize(
......
...@@ -28,8 +28,8 @@ ...@@ -28,8 +28,8 @@
namespace egr { namespace egr {
static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, static void CopyOrAddTensor(paddle::Tensor* tensor,
const paddle::experimental::Tensor& t, const paddle::Tensor& t,
bool is_fake_empty) { bool is_fake_empty) {
if (is_fake_empty) { if (is_fake_empty) {
VLOG(3) << "Move Tensor ptr: " << t.impl(); VLOG(3) << "Move Tensor ptr: " << t.impl();
...@@ -48,15 +48,14 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, ...@@ -48,15 +48,14 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
if (t.is_custom_device()) { if (t.is_custom_device()) {
*tensor = add_ad_func(t, *tensor); *tensor = add_ad_func(t, *tensor);
} else { } else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(t, paddle::imperative::TensorAdd<paddle::Tensor>(t, tensor);
tensor);
} }
} else { } else {
// TODO(jiabin): Support Other TensorBase later // TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with // TODO(zhanlve): Replace SelectedRowsAddTensor with
// add_dygraph_function once it's supported // add_dygraph_function once it's supported
paddle::experimental::Tensor new_buffer( paddle::Tensor new_buffer(std::make_shared<phi::DenseTensor>(),
std::make_shared<phi::DenseTensor>(), "tmp_accumulator"); "tmp_accumulator");
paddle::imperative::SelectedRowsAddTensor(*tensor, t, &new_buffer); paddle::imperative::SelectedRowsAddTensor(*tensor, t, &new_buffer);
tensor->set_impl(new_buffer.impl()); tensor->set_impl(new_buffer.impl());
} }
...@@ -65,19 +64,17 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, ...@@ -65,19 +64,17 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
if (LIKELY(tensor->is_sparse_coo_tensor())) { if (LIKELY(tensor->is_sparse_coo_tensor())) {
auto t_sparse = auto t_sparse =
std::dynamic_pointer_cast<phi::SparseCooTensor>(t.impl()); std::dynamic_pointer_cast<phi::SparseCooTensor>(t.impl());
paddle::experimental::Tensor t_values( paddle::Tensor t_values(std::make_shared<phi::DenseTensor>(
std::make_shared<phi::DenseTensor>( t_sparse->non_zero_elements()));
t_sparse->non_zero_elements()));
auto tensor_sparse = auto tensor_sparse =
std::dynamic_pointer_cast<phi::SparseCooTensor>(tensor->impl()); std::dynamic_pointer_cast<phi::SparseCooTensor>(tensor->impl());
paddle::experimental::Tensor tensor_values( paddle::Tensor tensor_values(std::make_shared<phi::DenseTensor>(
std::make_shared<phi::DenseTensor>( tensor_sparse->non_zero_elements()));
tensor_sparse->non_zero_elements()));
if (t.is_custom_device()) { if (t.is_custom_device()) {
tensor_values = add_ad_func(t_values, tensor_values); tensor_values = add_ad_func(t_values, tensor_values);
} else { } else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>( paddle::imperative::TensorAdd<paddle::Tensor>(t_values,
t_values, &tensor_values); &tensor_values);
} }
} }
} else { } else {
...@@ -88,18 +85,18 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, ...@@ -88,18 +85,18 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
if (tensor->is_dense_tensor()) { if (tensor->is_dense_tensor()) {
paddle::imperative::SelectedRowsAddToTensor(t, tensor); paddle::imperative::SelectedRowsAddToTensor(t, tensor);
} else { } else {
*tensor = std::move(*paddle::imperative::SelectedRowsMerge< *tensor =
paddle::experimental::Tensor>(t, *tensor)); std::move(*paddle::imperative::SelectedRowsMerge<paddle::Tensor>(
t, *tensor));
} }
} }
} }
} }
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
GradNodeAccumulation::operator()( GradNodeAccumulation::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
...@@ -116,10 +113,9 @@ GradNodeAccumulation::operator()( ...@@ -116,10 +113,9 @@ GradNodeAccumulation::operator()(
grads[0].size(), grads[0].size(),
0)); 0));
// Apply Gradient Hooks // Apply Gradient Hooks
paddle::experimental::Tensor grad_out; paddle::Tensor grad_out;
if (GradientHooksRegistered()) { if (GradientHooksRegistered()) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads); hooked_grads = ApplyGradientHooks(grads);
grad_out = hooked_grads[0][0]; grad_out = hooked_grads[0][0];
} else { } else {
......
...@@ -37,9 +37,9 @@ class GradNodeAccumulation : public GradNodeBase { ...@@ -37,9 +37,9 @@ class GradNodeAccumulation : public GradNodeBase {
} }
// Functor: perform backward computations // Functor: perform backward computations
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, operator()(paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, bool create_graph = false,
bool is_new_grad = false) override; bool is_new_grad = false) override;
...@@ -69,11 +69,9 @@ class GradNodeAccumulation : public GradNodeBase { ...@@ -69,11 +69,9 @@ class GradNodeAccumulation : public GradNodeBase {
private: private:
// TODO(Jiabin): remove this when we make our clear gradient really cleared; // TODO(Jiabin): remove this when we make our clear gradient really cleared;
bool is_fake_empty_ = {false}; bool is_fake_empty_ = {false};
std::weak_ptr<paddle::experimental::Tensor> weak_grad_; std::weak_ptr<paddle::Tensor> weak_grad_;
std::vector<std::shared_ptr<VoidHook>> reduce_hooks_; std::vector<std::shared_ptr<VoidHook>> reduce_hooks_;
std::function<paddle::experimental::Tensor( std::function<paddle::Tensor(const paddle::Tensor&)> retain_grad_hook_;
const paddle::experimental::Tensor&)>
retain_grad_hook_;
}; };
} // namespace egr } // namespace egr
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
namespace egr { namespace egr {
static inline bool NeedCast(const paddle::experimental::Tensor& tensor, static inline bool NeedCast(const paddle::Tensor& tensor,
const paddle::experimental::DataType& dst_dtype) { const paddle::experimental::DataType& dst_dtype) {
auto place = tensor.place(); auto place = tensor.place();
auto data_type = tensor.dtype(); auto data_type = tensor.dtype();
...@@ -42,15 +42,15 @@ static inline bool NeedCast(const paddle::experimental::Tensor& tensor, ...@@ -42,15 +42,15 @@ static inline bool NeedCast(const paddle::experimental::Tensor& tensor,
return false; return false;
} }
inline std::vector<paddle::experimental::Tensor> AmpAutoCasts( inline std::vector<paddle::Tensor> AmpAutoCasts(
const std::string& inputs_name, const std::string& inputs_name,
const std::vector<paddle::experimental::Tensor>& inputs, const std::vector<paddle::Tensor>& inputs,
const paddle::experimental::DataType& dst_dtype, const paddle::experimental::DataType& dst_dtype,
std::string op_name) { std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:" VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype(" << " inputs(" << inputs_name << ") dst_dtype("
<< phi::DataTypeToString(dst_dtype) << ")."; << phi::DataTypeToString(dst_dtype) << ").";
std::vector<paddle::experimental::Tensor> inputs_casted; std::vector<paddle::Tensor> inputs_casted;
for (auto& input : inputs) { for (auto& input : inputs) {
if (NeedCast(input, dst_dtype)) { if (NeedCast(input, dst_dtype)) {
paddle::framework::AttributeMap cast_attrs = { paddle::framework::AttributeMap cast_attrs = {
...@@ -65,9 +65,9 @@ inline std::vector<paddle::experimental::Tensor> AmpAutoCasts( ...@@ -65,9 +65,9 @@ inline std::vector<paddle::experimental::Tensor> AmpAutoCasts(
return inputs_casted; return inputs_casted;
} }
inline paddle::experimental::Tensor AmpAutoCast( inline paddle::Tensor AmpAutoCast(
const std::string& input_name, const std::string& input_name,
const paddle::experimental::Tensor& input, const paddle::Tensor& input,
const paddle::experimental::DataType& dst_dtype, const paddle::experimental::DataType& dst_dtype,
std::string op_name) { std::string op_name) {
VLOG(6) << "AMP AmpAutoCasts:" VLOG(6) << "AMP AmpAutoCasts:"
......
...@@ -22,7 +22,7 @@ namespace egr { ...@@ -22,7 +22,7 @@ namespace egr {
static inline paddle::experimental::DataType GetPromoteType( static inline paddle::experimental::DataType GetPromoteType(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector, kSlotSmallVectorSize>& amp_tensors_vector,
const paddle::experimental::DataType& amp_dtype) { const paddle::experimental::DataType& amp_dtype) {
auto dst_type = amp_dtype; auto dst_type = amp_dtype;
...@@ -87,7 +87,7 @@ static inline paddle::experimental::DataType GetPromoteType( ...@@ -87,7 +87,7 @@ static inline paddle::experimental::DataType GetPromoteType(
inline paddle::experimental::DataType GetDtypeWithPlace( inline paddle::experimental::DataType GetDtypeWithPlace(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector, kSlotSmallVectorSize>& amp_tensors_vector,
const paddle::experimental::DataType amp_dtype) { const paddle::experimental::DataType amp_dtype) {
if (amp_dtype == paddle::experimental::DataType::FLOAT32) { if (amp_dtype == paddle::experimental::DataType::FLOAT32) {
...@@ -120,7 +120,7 @@ inline paddle::experimental::DataType GetDtypeWithPlace( ...@@ -120,7 +120,7 @@ inline paddle::experimental::DataType GetDtypeWithPlace(
inline paddle::experimental::DataType GetAmpDestDtype( inline paddle::experimental::DataType GetAmpDestDtype(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector) { kSlotSmallVectorSize>& amp_tensors_vector) {
auto amp_dtype = auto amp_dtype =
egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype(); egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype();
......
...@@ -93,11 +93,11 @@ static void ScaleDeviceDispatch(const phi::DenseTensor& dense_tensor, ...@@ -93,11 +93,11 @@ static void ScaleDeviceDispatch(const phi::DenseTensor& dense_tensor,
} }
} }
void ScaleAPI(const paddle::experimental::Tensor& x, void ScaleAPI(const paddle::Tensor& x,
float scale, float scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
paddle::experimental::Tensor* out) { paddle::Tensor* out) {
// TODO(jiabin): Support multiple tensor here, Create DenseTensor is not a // TODO(jiabin): Support multiple tensor here, Create DenseTensor is not a
// proper way to Demo it // proper way to Demo it
// Run Forward Function // Run Forward Function
...@@ -161,16 +161,15 @@ void ScaleAPI(const paddle::experimental::Tensor& x, ...@@ -161,16 +161,15 @@ void ScaleAPI(const paddle::experimental::Tensor& x,
} }
void GradNodeScale::SetTensorWrappers_X( void GradNodeScale::SetTensorWrappers_X(
const std::vector<paddle::experimental::Tensor>& tensors) { const std::vector<paddle::Tensor>& tensors) {
// Does nothing for scale // Does nothing for scale
} }
void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
GradNodeScale::operator()( GradNodeScale::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
...@@ -183,17 +182,14 @@ GradNodeScale::operator()( ...@@ -183,17 +182,14 @@ GradNodeScale::operator()(
"However received: %d", "However received: %d",
"This indicates an issue with Eager Dygraph Backward logic", "This indicates an issue with Eager Dygraph Backward logic",
grads.size())); grads.size()));
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize> outs;
kSlotSmallVectorSize>
outs;
// 2. Create needed out parttern // 2. Create needed out parttern
paddle::experimental::Tensor out; paddle::Tensor out;
// Apply Gradient Hooks // Apply Gradient Hooks
if (GradientHooksRegistered()) { if (GradientHooksRegistered()) {
// TODO(jiabin): Shall we apply hook slot by slot here or accept // TODO(jiabin): Shall we apply hook slot by slot here or accept
// vector<vector<phi::tensor>> to apply all hooks? // vector<vector<phi::tensor>> to apply all hooks?
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads); hooked_grads = ApplyGradientHooks(grads);
ScaleAPI(/* slot by slot set */ hooked_grads[0][0], ScaleAPI(/* slot by slot set */ hooked_grads[0][0],
scale_, scale_,
......
...@@ -27,11 +27,11 @@ ...@@ -27,11 +27,11 @@
*/ */
namespace egr { namespace egr {
void ScaleAPI(const paddle::experimental::Tensor& x, void ScaleAPI(const paddle::Tensor& x,
float scale, float scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
paddle::experimental::Tensor* out); paddle::Tensor* out);
class GradNodeScale : public GradNodeBase { class GradNodeScale : public GradNodeBase {
public: public:
...@@ -41,17 +41,16 @@ class GradNodeScale : public GradNodeBase { ...@@ -41,17 +41,16 @@ class GradNodeScale : public GradNodeBase {
~GradNodeScale() override = default; ~GradNodeScale() override = default;
// Functor: perform backward computations // Functor: perform backward computations
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, operator()(paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, bool create_graph = false,
bool is_new_grad = false) override; bool is_new_grad = false) override;
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
void SetTensorWrappers_X( void SetTensorWrappers_X(const std::vector<paddle::Tensor>& tensors);
const std::vector<paddle::experimental::Tensor>& tensors);
void SetAttributes_scale(float scale); void SetAttributes_scale(float scale);
std::string name() override { return "scale node"; } std::string name() override { return "scale node"; }
......
...@@ -32,14 +32,14 @@ ...@@ -32,14 +32,14 @@
namespace egr { namespace egr {
paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, paddle::Tensor scale(const paddle::Tensor& x,
float scale, float scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
bool trace_backward) { bool trace_backward) {
// 1. Run Forward // 1. Run Forward
// 1.1 Create outputs // 1.1 Create outputs
paddle::experimental::Tensor out; paddle::Tensor out;
// 1.2 Need by original op, we assemble ins, outs, attrs here // 1.2 Need by original op, we assemble ins, outs, attrs here
// 1.3 Call forward C++ api // 1.3 Call forward C++ api
......
...@@ -17,10 +17,10 @@ ...@@ -17,10 +17,10 @@
#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/eager_tensor.h"
namespace egr { namespace egr {
paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, paddle::Tensor scale(const paddle::Tensor& x,
float scale, float scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
bool trace_backward); bool trace_backward);
} // namespace egr } // namespace egr
...@@ -16,15 +16,13 @@ ...@@ -16,15 +16,13 @@
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
paddle::experimental::Tensor add_n_ad_func( paddle::Tensor add_n_ad_func(const std::vector<paddle::Tensor>& x);
const std::vector<paddle::experimental::Tensor>& x);
paddle::experimental::Tensor conv2d_ad_func( paddle::Tensor conv2d_ad_func(const paddle::Tensor& input,
const paddle::experimental::Tensor& input, const paddle::Tensor& filter,
const paddle::experimental::Tensor& filter, std::vector<int> strides,
std::vector<int> strides, std::vector<int> paddings,
std::vector<int> paddings, std::string padding_algorithm,
std::string padding_algorithm, std::vector<int> dilations,
std::vector<int> dilations, int groups,
int groups, std::string data_format);
std::string data_format);
...@@ -22,8 +22,7 @@ ...@@ -22,8 +22,7 @@
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
paddle::experimental::Tensor add_n_ad_func( paddle::Tensor add_n_ad_func(const std::vector<paddle::Tensor>& x) {
const std::vector<paddle::experimental::Tensor>& x) {
// Dygraph Record Event // Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"add_n dygraph", paddle::platform::TracerEventType::Operator, 1); "add_n dygraph", paddle::platform::TracerEventType::Operator, 1);
...@@ -33,8 +32,7 @@ paddle::experimental::Tensor add_n_ad_func( ...@@ -33,8 +32,7 @@ paddle::experimental::Tensor add_n_ad_func(
paddle::imperative::AmpLevel::O0) { paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
auto op_name = phi::TransToFluidOpName("add_n"); auto op_name = phi::TransToFluidOpName("add_n");
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
amp_tensors_vector = {x}; amp_tensors_vector = {x};
auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector); auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
......
...@@ -23,15 +23,14 @@ ...@@ -23,15 +23,14 @@
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
paddle::experimental::Tensor conv2d_ad_func( paddle::Tensor conv2d_ad_func(const paddle::Tensor& input,
const paddle::experimental::Tensor& input, const paddle::Tensor& filter,
const paddle::experimental::Tensor& filter, std::vector<int> strides,
std::vector<int> strides, std::vector<int> paddings,
std::vector<int> paddings, std::string padding_algorithm,
std::string padding_algorithm, std::vector<int> dilations,
std::vector<int> dilations, int groups,
int groups, std::string data_format) {
std::string data_format) {
// Dygraph Record Event // Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"conv2d dygraph", paddle::platform::TracerEventType::Operator, 1); "conv2d dygraph", paddle::platform::TracerEventType::Operator, 1);
...@@ -41,8 +40,7 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -41,8 +40,7 @@ paddle::experimental::Tensor conv2d_ad_func(
paddle::imperative::AmpLevel::O0) { paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
auto op_name = phi::TransToFluidOpName("conv2d"); auto op_name = phi::TransToFluidOpName("conv2d");
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
amp_tensors_vector = {{input}, {filter}}; amp_tensors_vector = {{input}, {filter}};
auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector); auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
...@@ -71,8 +69,7 @@ paddle::experimental::Tensor conv2d_ad_func( ...@@ -71,8 +69,7 @@ paddle::experimental::Tensor conv2d_ad_func(
if (egr::Controller::Instance().UseLayoutAutoTune()) { if (egr::Controller::Instance().UseLayoutAutoTune()) {
VLOG(5) << "Check and Prepare For LAYOUT"; VLOG(5) << "Check and Prepare For LAYOUT";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
tensors_vector = {{input}, {filter}}; tensors_vector = {{input}, {filter}};
auto op_name = phi::TransToFluidOpName("conv2d"); auto op_name = phi::TransToFluidOpName("conv2d");
......
...@@ -24,10 +24,9 @@ ...@@ -24,10 +24,9 @@
#include "paddle/phi/api/lib/api_custom_impl.h" #include "paddle/phi/api/lib/api_custom_impl.h"
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
AddNGradNodeFinal::operator()( AddNGradNodeFinal::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
...@@ -42,15 +41,14 @@ AddNGradNodeFinal::operator()( ...@@ -42,15 +41,14 @@ AddNGradNodeFinal::operator()(
// Prepare Grad function call // Prepare Grad function call
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
returns(1); returns(1);
for (int i = 0; i < 1; ++i) { for (int i = 0; i < 1; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1) out_metas[i].size() == 0 ? returns[i].resize(1)
: returns[i].resize(out_metas[i].size()); : returns[i].resize(out_metas[i].size());
} }
std::vector<paddle::experimental::Tensor*> api_output_0; std::vector<paddle::Tensor*> api_output_0;
api_output_0.reserve(returns[0].size()); api_output_0.reserve(returns[0].size());
for (size_t i = 0; i < returns[0].size(); ++i) { for (size_t i = 0; i < returns[0].size(); ++i) {
if (out_metas[0].empty() || out_metas[0][i].IsStopGradient()) { if (out_metas[0].empty() || out_metas[0][i].IsStopGradient()) {
......
...@@ -28,10 +28,9 @@ ...@@ -28,10 +28,9 @@
#include "paddle/phi/api/include/sparse_api.h" #include "paddle/phi/api/include/sparse_api.h"
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
Conv2dGradNodeFinal::operator()( Conv2dGradNodeFinal::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
...@@ -53,8 +52,7 @@ Conv2dGradNodeFinal::operator()( ...@@ -53,8 +52,7 @@ Conv2dGradNodeFinal::operator()(
// Prepare Grad function call // Prepare Grad function call
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
returns(2); returns(2);
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1) out_metas[i].size() == 0 ? returns[i].resize(1)
...@@ -167,10 +165,9 @@ Conv2dGradNodeFinal::operator()( ...@@ -167,10 +165,9 @@ Conv2dGradNodeFinal::operator()(
return returns; return returns;
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
Conv2dDoubleGradNodeFinal::operator()( Conv2dDoubleGradNodeFinal::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
...@@ -190,17 +187,17 @@ Conv2dDoubleGradNodeFinal::operator()( ...@@ -190,17 +187,17 @@ Conv2dDoubleGradNodeFinal::operator()(
auto grad_out = egr::EagerUtils::RecoverTensorWrapper(&this->grad_out_); auto grad_out = egr::EagerUtils::RecoverTensorWrapper(&this->grad_out_);
auto& grad_input_grad = hooked_grads[0][0]; auto& grad_input_grad = hooked_grads[0][0];
paddle::optional<paddle::experimental::Tensor> grad_input_grad_optional; paddle::optional<paddle::Tensor> grad_input_grad_optional;
if (grad_input_grad.initialized()) if (grad_input_grad.initialized())
grad_input_grad_optional = grad_input_grad_optional =
paddle::make_optional<paddle::experimental::Tensor>(grad_input_grad); paddle::make_optional<paddle::Tensor>(grad_input_grad);
auto& grad_filter_grad = hooked_grads[1][0]; auto& grad_filter_grad = hooked_grads[1][0];
paddle::optional<paddle::experimental::Tensor> grad_filter_grad_optional; paddle::optional<paddle::Tensor> grad_filter_grad_optional;
if (grad_filter_grad.initialized()) if (grad_filter_grad.initialized())
grad_filter_grad_optional = grad_filter_grad_optional =
paddle::make_optional<paddle::experimental::Tensor>(grad_filter_grad); paddle::make_optional<paddle::Tensor>(grad_filter_grad);
auto& strides = this->strides_; auto& strides = this->strides_;
auto& paddings = this->paddings_; auto& paddings = this->paddings_;
...@@ -211,8 +208,7 @@ Conv2dDoubleGradNodeFinal::operator()( ...@@ -211,8 +208,7 @@ Conv2dDoubleGradNodeFinal::operator()(
// Prepare Grad function call // Prepare Grad function call
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
returns(3); returns(3);
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1) out_metas[i].size() == 0 ? returns[i].resize(1)
......
...@@ -24,13 +24,12 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase { ...@@ -24,13 +24,12 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~Conv2dGradNodeFinal() override = default; ~Conv2dGradNodeFinal() override = default;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize> egr::kSlotSmallVectorSize>
operator()( operator()(paddle::small_vector<std::vector<paddle::Tensor>, // NOLINT
paddle::small_vector<std::vector<paddle::experimental::Tensor>, // NOLINT egr::kSlotSmallVectorSize>& grads, // NOLINT
egr::kSlotSmallVectorSize>& grads, // NOLINT bool create_graph = false, // NOLINT
bool create_graph = false, // NOLINT bool is_new_grad = false) override; // NOLINT
bool is_new_grad = false) override; // NOLINT
std::string name() override { return "Conv2dGradNodeFinal"; } std::string name() override { return "Conv2dGradNodeFinal"; }
void ClearTensorWrappers() override { void ClearTensorWrappers() override {
...@@ -49,10 +48,10 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase { ...@@ -49,10 +48,10 @@ class Conv2dGradNodeFinal : public egr::GradNodeBase {
} }
// SetTensorWrapperX, SetTensorWrapperY, ... // SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperinput(const paddle::experimental::Tensor& input) { void SetTensorWrapperinput(const paddle::Tensor& input) {
input_ = egr::TensorWrapper(input, false); input_ = egr::TensorWrapper(input, false);
} }
void SetTensorWrapperfilter(const paddle::experimental::Tensor& filter) { void SetTensorWrapperfilter(const paddle::Tensor& filter) {
filter_ = egr::TensorWrapper(filter, false); filter_ = egr::TensorWrapper(filter, false);
} }
...@@ -95,13 +94,12 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { ...@@ -95,13 +94,12 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~Conv2dDoubleGradNodeFinal() override = default; ~Conv2dDoubleGradNodeFinal() override = default;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize> egr::kSlotSmallVectorSize>
operator()( operator()(paddle::small_vector<std::vector<paddle::Tensor>, // NOLINT
paddle::small_vector<std::vector<paddle::experimental::Tensor>, // NOLINT egr::kSlotSmallVectorSize>& grads, // NOLINT
egr::kSlotSmallVectorSize>& grads, // NOLINT bool create_graph = false, // NOLINT
bool create_graph = false, // NOLINT bool is_new_grad = false) override; // NOLINT
bool is_new_grad = false) override; // NOLINT
std::string name() override { return "Conv2dDoubleGradNodeFinal"; } std::string name() override { return "Conv2dDoubleGradNodeFinal"; }
void ClearTensorWrappers() override { void ClearTensorWrappers() override {
...@@ -119,13 +117,13 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { ...@@ -119,13 +117,13 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
} }
// SetTensorWrapperX, SetTensorWrapperY, ... // SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperinput(const paddle::experimental::Tensor& input) { void SetTensorWrapperinput(const paddle::Tensor& input) {
input_ = egr::TensorWrapper(input, false); input_ = egr::TensorWrapper(input, false);
} }
void SetTensorWrapperfilter(const paddle::experimental::Tensor& filter) { void SetTensorWrapperfilter(const paddle::Tensor& filter) {
filter_ = egr::TensorWrapper(filter, false); filter_ = egr::TensorWrapper(filter, false);
} }
void SetTensorWrappergrad_out(const paddle::experimental::Tensor& grad_out) { void SetTensorWrappergrad_out(const paddle::Tensor& grad_out) {
grad_out_ = egr::TensorWrapper(grad_out, false); grad_out_ = egr::TensorWrapper(grad_out, false);
} }
...@@ -169,13 +167,12 @@ class AddNGradNodeFinal : public egr::GradNodeBase { ...@@ -169,13 +167,12 @@ class AddNGradNodeFinal : public egr::GradNodeBase {
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~AddNGradNodeFinal() override = default; ~AddNGradNodeFinal() override = default;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize> egr::kSlotSmallVectorSize>
operator()( operator()(paddle::small_vector<std::vector<paddle::Tensor>, // NOLINT
paddle::small_vector<std::vector<paddle::experimental::Tensor>, // NOLINT egr::kSlotSmallVectorSize>& grads, // NOLINT
egr::kSlotSmallVectorSize>& grads, // NOLINT bool create_graph = false,
bool create_graph = false, bool is_new_grad = false) override;
bool is_new_grad = false) override;
std::string name() override { return "AddNGradNodeFinal"; } std::string name() override { return "AddNGradNodeFinal"; }
void ClearTensorWrappers() override { void ClearTensorWrappers() override {
...@@ -193,7 +190,7 @@ class AddNGradNodeFinal : public egr::GradNodeBase { ...@@ -193,7 +190,7 @@ class AddNGradNodeFinal : public egr::GradNodeBase {
} }
// SetTensorWrapperX, SetTensorWrapperY, ... // SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperx(const std::vector<paddle::experimental::Tensor>& x) { void SetTensorWrapperx(const std::vector<paddle::Tensor>& x) {
for (const auto& eager_tensor : x) { for (const auto& eager_tensor : x) {
x_.emplace_back(egr::TensorWrapper(eager_tensor, true)); x_.emplace_back(egr::TensorWrapper(eager_tensor, true));
} }
......
...@@ -20,103 +20,103 @@ ...@@ -20,103 +20,103 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_gate_attention_dygraph_function( fused_gate_attention_dygraph_function(
const paddle::experimental::Tensor& Query, const paddle::Tensor& Query,
const paddle::experimental::Tensor& Key, const paddle::Tensor& Key,
const paddle::experimental::Tensor& QueryWeight, const paddle::Tensor& QueryWeight,
const paddle::experimental::Tensor& KeyWeight, const paddle::Tensor& KeyWeight,
const paddle::experimental::Tensor& ValueWeight, const paddle::Tensor& ValueWeight,
const paddle::experimental::Tensor& QKVWeight, const paddle::Tensor& QKVWeight,
const paddle::experimental::Tensor& NonbatchedBias, const paddle::Tensor& NonbatchedBias,
const paddle::experimental::Tensor& SrcMask, const paddle::Tensor& SrcMask,
const paddle::experimental::Tensor& GateWeight, const paddle::Tensor& GateWeight,
const paddle::experimental::Tensor& GateBias, const paddle::Tensor& GateBias,
const paddle::experimental::Tensor& OutLinearWeight, const paddle::Tensor& OutLinearWeight,
const paddle::experimental::Tensor& OutLinearBias, const paddle::Tensor& OutLinearBias,
const paddle::framework::AttributeMap& attr_map); const paddle::framework::AttributeMap& attr_map);
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_feedforward_dygraph_function( fused_feedforward_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& Dropout1Seed, const paddle::Tensor& Dropout1Seed,
const paddle::experimental::Tensor& Dropout2Seed, const paddle::Tensor& Dropout2Seed,
const paddle::experimental::Tensor& Linear1Weight, const paddle::Tensor& Linear1Weight,
const paddle::experimental::Tensor& Linear1Bias, const paddle::Tensor& Linear1Bias,
const paddle::experimental::Tensor& Linear2Weight, const paddle::Tensor& Linear2Weight,
const paddle::experimental::Tensor& Linear2Bias, const paddle::Tensor& Linear2Bias,
const paddle::experimental::Tensor& Ln1Scale, const paddle::Tensor& Ln1Scale,
const paddle::experimental::Tensor& Ln1Bias, const paddle::Tensor& Ln1Bias,
const paddle::experimental::Tensor& Ln2Scale, const paddle::Tensor& Ln2Scale,
const paddle::experimental::Tensor& Ln2Bias, const paddle::Tensor& Ln2Bias,
const paddle::framework::AttributeMap& attr_map); const paddle::framework::AttributeMap& attr_map);
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_attention_dygraph_function( fused_attention_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& LnScale, const paddle::Tensor& LnScale,
const paddle::experimental::Tensor& LnBias, const paddle::Tensor& LnBias,
const paddle::experimental::Tensor& QKVW, const paddle::Tensor& QKVW,
const paddle::experimental::Tensor& QKVBias, const paddle::Tensor& QKVBias,
const paddle::experimental::Tensor& CacheKV, const paddle::Tensor& CacheKV,
const paddle::experimental::Tensor& SrcMask, const paddle::Tensor& SrcMask,
const paddle::experimental::Tensor& OutLinearW, const paddle::Tensor& OutLinearW,
const paddle::experimental::Tensor& OutLinearBias, const paddle::Tensor& OutLinearBias,
const paddle::experimental::Tensor& Ln2Scale, const paddle::Tensor& Ln2Scale,
const paddle::experimental::Tensor& Ln2Bias, const paddle::Tensor& Ln2Bias,
const paddle::framework::AttributeMap& attr_map); const paddle::framework::AttributeMap& attr_map);
paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function( paddle::Tensor fused_gemm_epilogue_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& Y, const paddle::Tensor& Y,
const paddle::experimental::Tensor& Bias, const paddle::Tensor& Bias,
const paddle::framework::AttributeMap& attr_map); const paddle::framework::AttributeMap& attr_map);
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_bias_dropout_residual_layer_norm_dygraph_function( fused_bias_dropout_residual_layer_norm_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& Residual, const paddle::Tensor& Residual,
const paddle::experimental::Tensor& Bias, const paddle::Tensor& Bias,
const paddle::experimental::Tensor& LnScale, const paddle::Tensor& LnScale,
const paddle::experimental::Tensor& LnBias, const paddle::Tensor& LnBias,
const paddle::framework::AttributeMap& attr_map); const paddle::framework::AttributeMap& attr_map);
...@@ -20,38 +20,38 @@ ...@@ -20,38 +20,38 @@
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_attention_dygraph_function( fused_attention_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& LnScale, const paddle::Tensor& LnScale,
const paddle::experimental::Tensor& LnBias, const paddle::Tensor& LnBias,
const paddle::experimental::Tensor& QKVW, const paddle::Tensor& QKVW,
const paddle::experimental::Tensor& QKVBias, const paddle::Tensor& QKVBias,
const paddle::experimental::Tensor& CacheKV, const paddle::Tensor& CacheKV,
const paddle::experimental::Tensor& SrcMask, const paddle::Tensor& SrcMask,
const paddle::experimental::Tensor& OutLinearW, const paddle::Tensor& OutLinearW,
const paddle::experimental::Tensor& OutLinearBias, const paddle::Tensor& OutLinearBias,
const paddle::experimental::Tensor& Ln2Scale, const paddle::Tensor& Ln2Scale,
const paddle::experimental::Tensor& Ln2Bias, const paddle::Tensor& Ln2Bias,
const paddle::framework::AttributeMap& attr_map) { const paddle::framework::AttributeMap& attr_map) {
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"fused_attention dygraph", "fused_attention dygraph",
...@@ -64,8 +64,7 @@ fused_attention_dygraph_function( ...@@ -64,8 +64,7 @@ fused_attention_dygraph_function(
paddle::imperative::AmpLevel::O0) { paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
amp_tensors_vector = {{X}, {QKVW}, {OutLinearW}}; amp_tensors_vector = {{X}, {QKVW}, {OutLinearW}};
if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale}); if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale});
if (LnBias.initialized()) amp_tensors_vector.push_back({LnBias}); if (LnBias.initialized()) amp_tensors_vector.push_back({LnBias});
...@@ -280,47 +279,47 @@ fused_attention_dygraph_function( ...@@ -280,47 +279,47 @@ fused_attention_dygraph_function(
true, true,
{}); {});
paddle::experimental::Tensor LnMean; paddle::Tensor LnMean;
egr::EagerUtils::GetOutput(outs["LnMean"][0], &LnMean); egr::EagerUtils::GetOutput(outs["LnMean"][0], &LnMean);
paddle::experimental::Tensor LnVariance; paddle::Tensor LnVariance;
egr::EagerUtils::GetOutput(outs["LnVariance"][0], &LnVariance); egr::EagerUtils::GetOutput(outs["LnVariance"][0], &LnVariance);
paddle::experimental::Tensor LnOut; paddle::Tensor LnOut;
egr::EagerUtils::GetOutput(outs["LnOut"][0], &LnOut); egr::EagerUtils::GetOutput(outs["LnOut"][0], &LnOut);
paddle::experimental::Tensor QKVOut; paddle::Tensor QKVOut;
egr::EagerUtils::GetOutput(outs["QKVOut"][0], &QKVOut); egr::EagerUtils::GetOutput(outs["QKVOut"][0], &QKVOut);
paddle::experimental::Tensor QKVBiasOut; paddle::Tensor QKVBiasOut;
egr::EagerUtils::GetOutput(outs["QKVBiasOut"][0], &QKVBiasOut); egr::EagerUtils::GetOutput(outs["QKVBiasOut"][0], &QKVBiasOut);
paddle::experimental::Tensor TransposeOut2; paddle::Tensor TransposeOut2;
egr::EagerUtils::GetOutput(outs["TransposeOut2"][0], &TransposeOut2); egr::EagerUtils::GetOutput(outs["TransposeOut2"][0], &TransposeOut2);
paddle::experimental::Tensor QKOut; paddle::Tensor QKOut;
egr::EagerUtils::GetOutput(outs["QKOut"][0], &QKOut); egr::EagerUtils::GetOutput(outs["QKOut"][0], &QKOut);
paddle::experimental::Tensor QKTVOut; paddle::Tensor QKTVOut;
egr::EagerUtils::GetOutput(outs["QKTVOut"][0], &QKTVOut); egr::EagerUtils::GetOutput(outs["QKTVOut"][0], &QKTVOut);
paddle::experimental::Tensor SoftmaxOut; paddle::Tensor SoftmaxOut;
egr::EagerUtils::GetOutput(outs["SoftmaxOut"][0], &SoftmaxOut); egr::EagerUtils::GetOutput(outs["SoftmaxOut"][0], &SoftmaxOut);
paddle::experimental::Tensor AttnDropoutMaskOut; paddle::Tensor AttnDropoutMaskOut;
egr::EagerUtils::GetOutput(outs["AttnDropoutMaskOut"][0], egr::EagerUtils::GetOutput(outs["AttnDropoutMaskOut"][0],
&AttnDropoutMaskOut); &AttnDropoutMaskOut);
paddle::experimental::Tensor AttnDropoutOut; paddle::Tensor AttnDropoutOut;
egr::EagerUtils::GetOutput(outs["AttnDropoutOut"][0], &AttnDropoutOut); egr::EagerUtils::GetOutput(outs["AttnDropoutOut"][0], &AttnDropoutOut);
paddle::experimental::Tensor SrcMaskOut; paddle::Tensor SrcMaskOut;
egr::EagerUtils::GetOutput(outs["SrcMaskOut"][0], &SrcMaskOut); egr::EagerUtils::GetOutput(outs["SrcMaskOut"][0], &SrcMaskOut);
paddle::experimental::Tensor FMHAOut; paddle::Tensor FMHAOut;
egr::EagerUtils::GetOutput(outs["FMHAOut"][0], &FMHAOut); egr::EagerUtils::GetOutput(outs["FMHAOut"][0], &FMHAOut);
paddle::experimental::Tensor OutLinearOut; paddle::Tensor OutLinearOut;
egr::EagerUtils::GetOutput(outs["OutLinearOut"][0], &OutLinearOut); egr::EagerUtils::GetOutput(outs["OutLinearOut"][0], &OutLinearOut);
paddle::experimental::Tensor DropoutMaskOut; paddle::Tensor DropoutMaskOut;
egr::EagerUtils::GetOutput(outs["DropoutMaskOut"][0], &DropoutMaskOut); egr::EagerUtils::GetOutput(outs["DropoutMaskOut"][0], &DropoutMaskOut);
paddle::experimental::Tensor Ln2Mean; paddle::Tensor Ln2Mean;
egr::EagerUtils::GetOutput(outs["Ln2Mean"][0], &Ln2Mean); egr::EagerUtils::GetOutput(outs["Ln2Mean"][0], &Ln2Mean);
paddle::experimental::Tensor Ln2Variance; paddle::Tensor Ln2Variance;
egr::EagerUtils::GetOutput(outs["Ln2Variance"][0], &Ln2Variance); egr::EagerUtils::GetOutput(outs["Ln2Variance"][0], &Ln2Variance);
paddle::experimental::Tensor BiasDropoutResidualOut; paddle::Tensor BiasDropoutResidualOut;
egr::EagerUtils::GetOutput(outs["BiasDropoutResidualOut"][0], egr::EagerUtils::GetOutput(outs["BiasDropoutResidualOut"][0],
&BiasDropoutResidualOut); &BiasDropoutResidualOut);
paddle::experimental::Tensor CacheKVOut; paddle::Tensor CacheKVOut;
egr::EagerUtils::GetOutput(outs["CacheKVOut"][0], &CacheKVOut); egr::EagerUtils::GetOutput(outs["CacheKVOut"][0], &CacheKVOut);
paddle::experimental::Tensor Y; paddle::Tensor Y;
egr::EagerUtils::GetOutput(outs["Y"][0], &Y); egr::EagerUtils::GetOutput(outs["Y"][0], &Y);
{ {
......
...@@ -20,17 +20,17 @@ ...@@ -20,17 +20,17 @@
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_bias_dropout_residual_layer_norm_dygraph_function( fused_bias_dropout_residual_layer_norm_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& Residual, const paddle::Tensor& Residual,
const paddle::experimental::Tensor& Bias, const paddle::Tensor& Bias,
const paddle::experimental::Tensor& LnScale, const paddle::Tensor& LnScale,
const paddle::experimental::Tensor& LnBias, const paddle::Tensor& LnBias,
const paddle::framework::AttributeMap& attr_map) { const paddle::framework::AttributeMap& attr_map) {
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"fused_bias_dropout_residual_layer_norm dygraph", "fused_bias_dropout_residual_layer_norm dygraph",
...@@ -43,8 +43,7 @@ fused_bias_dropout_residual_layer_norm_dygraph_function( ...@@ -43,8 +43,7 @@ fused_bias_dropout_residual_layer_norm_dygraph_function(
paddle::imperative::AmpLevel::O0) { paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
amp_tensors_vector = {{X}, {Residual}}; amp_tensors_vector = {{X}, {Residual}};
if (Bias.initialized()) amp_tensors_vector.push_back({Bias}); if (Bias.initialized()) amp_tensors_vector.push_back({Bias});
if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale}); if (LnScale.initialized()) amp_tensors_vector.push_back({LnScale});
...@@ -150,16 +149,16 @@ fused_bias_dropout_residual_layer_norm_dygraph_function( ...@@ -150,16 +149,16 @@ fused_bias_dropout_residual_layer_norm_dygraph_function(
true, true,
{}); {});
paddle::experimental::Tensor BiasDropoutResidualOut; paddle::Tensor BiasDropoutResidualOut;
egr::EagerUtils::GetOutput(outs["BiasDropoutResidualOut"][0], egr::EagerUtils::GetOutput(outs["BiasDropoutResidualOut"][0],
&BiasDropoutResidualOut); &BiasDropoutResidualOut);
paddle::experimental::Tensor DropoutMaskOut; paddle::Tensor DropoutMaskOut;
egr::EagerUtils::GetOutput(outs["DropoutMaskOut"][0], &DropoutMaskOut); egr::EagerUtils::GetOutput(outs["DropoutMaskOut"][0], &DropoutMaskOut);
paddle::experimental::Tensor LnMean; paddle::Tensor LnMean;
egr::EagerUtils::GetOutput(outs["LnMean"][0], &LnMean); egr::EagerUtils::GetOutput(outs["LnMean"][0], &LnMean);
paddle::experimental::Tensor LnVariance; paddle::Tensor LnVariance;
egr::EagerUtils::GetOutput(outs["LnVariance"][0], &LnVariance); egr::EagerUtils::GetOutput(outs["LnVariance"][0], &LnVariance);
paddle::experimental::Tensor Y; paddle::Tensor Y;
egr::EagerUtils::GetOutput(outs["Y"][0], &Y); egr::EagerUtils::GetOutput(outs["Y"][0], &Y);
{ {
......
...@@ -19,29 +19,29 @@ ...@@ -19,29 +19,29 @@
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_feedforward_dygraph_function( fused_feedforward_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& Dropout1Seed, const paddle::Tensor& Dropout1Seed,
const paddle::experimental::Tensor& Dropout2Seed, const paddle::Tensor& Dropout2Seed,
const paddle::experimental::Tensor& Linear1Weight, const paddle::Tensor& Linear1Weight,
const paddle::experimental::Tensor& Linear1Bias, const paddle::Tensor& Linear1Bias,
const paddle::experimental::Tensor& Linear2Weight, const paddle::Tensor& Linear2Weight,
const paddle::experimental::Tensor& Linear2Bias, const paddle::Tensor& Linear2Bias,
const paddle::experimental::Tensor& Ln1Scale, const paddle::Tensor& Ln1Scale,
const paddle::experimental::Tensor& Ln1Bias, const paddle::Tensor& Ln1Bias,
const paddle::experimental::Tensor& Ln2Scale, const paddle::Tensor& Ln2Scale,
const paddle::experimental::Tensor& Ln2Bias, const paddle::Tensor& Ln2Bias,
const paddle::framework::AttributeMap& attr_map) { const paddle::framework::AttributeMap& attr_map) {
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"fused_feedforward dygraph", "fused_feedforward dygraph",
...@@ -54,8 +54,7 @@ fused_feedforward_dygraph_function( ...@@ -54,8 +54,7 @@ fused_feedforward_dygraph_function(
paddle::imperative::AmpLevel::O0) { paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
amp_tensors_vector = {{X}, {Linear1Weight}, {Linear2Weight}}; amp_tensors_vector = {{X}, {Linear1Weight}, {Linear2Weight}};
if (Dropout1Seed.initialized()) if (Dropout1Seed.initialized())
amp_tensors_vector.push_back({Dropout1Seed}); amp_tensors_vector.push_back({Dropout1Seed});
...@@ -247,27 +246,27 @@ fused_feedforward_dygraph_function( ...@@ -247,27 +246,27 @@ fused_feedforward_dygraph_function(
true, true,
{}); {});
paddle::experimental::Tensor Out; paddle::Tensor Out;
egr::EagerUtils::GetOutput(outs["Out"][0], &Out); egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
paddle::experimental::Tensor Dropout1Mask; paddle::Tensor Dropout1Mask;
egr::EagerUtils::GetOutput(outs["Dropout1Mask"][0], &Dropout1Mask); egr::EagerUtils::GetOutput(outs["Dropout1Mask"][0], &Dropout1Mask);
paddle::experimental::Tensor Dropout2Mask; paddle::Tensor Dropout2Mask;
egr::EagerUtils::GetOutput(outs["Dropout2Mask"][0], &Dropout2Mask); egr::EagerUtils::GetOutput(outs["Dropout2Mask"][0], &Dropout2Mask);
paddle::experimental::Tensor Ln1Mean; paddle::Tensor Ln1Mean;
egr::EagerUtils::GetOutput(outs["Ln1Mean"][0], &Ln1Mean); egr::EagerUtils::GetOutput(outs["Ln1Mean"][0], &Ln1Mean);
paddle::experimental::Tensor Ln1Variance; paddle::Tensor Ln1Variance;
egr::EagerUtils::GetOutput(outs["Ln1Variance"][0], &Ln1Variance); egr::EagerUtils::GetOutput(outs["Ln1Variance"][0], &Ln1Variance);
paddle::experimental::Tensor Ln2Mean; paddle::Tensor Ln2Mean;
egr::EagerUtils::GetOutput(outs["Ln2Mean"][0], &Ln2Mean); egr::EagerUtils::GetOutput(outs["Ln2Mean"][0], &Ln2Mean);
paddle::experimental::Tensor Ln2Variance; paddle::Tensor Ln2Variance;
egr::EagerUtils::GetOutput(outs["Ln2Variance"][0], &Ln2Variance); egr::EagerUtils::GetOutput(outs["Ln2Variance"][0], &Ln2Variance);
paddle::experimental::Tensor Linear1Out; paddle::Tensor Linear1Out;
egr::EagerUtils::GetOutput(outs["Linear1Out"][0], &Linear1Out); egr::EagerUtils::GetOutput(outs["Linear1Out"][0], &Linear1Out);
paddle::experimental::Tensor Ln1Out; paddle::Tensor Ln1Out;
egr::EagerUtils::GetOutput(outs["Ln1Out"][0], &Ln1Out); egr::EagerUtils::GetOutput(outs["Ln1Out"][0], &Ln1Out);
paddle::experimental::Tensor Dropout1Out; paddle::Tensor Dropout1Out;
egr::EagerUtils::GetOutput(outs["Dropout1Out"][0], &Dropout1Out); egr::EagerUtils::GetOutput(outs["Dropout1Out"][0], &Dropout1Out);
paddle::experimental::Tensor Dropout2Out; paddle::Tensor Dropout2Out;
egr::EagerUtils::GetOutput(outs["Dropout2Out"][0], &Dropout2Out); egr::EagerUtils::GetOutput(outs["Dropout2Out"][0], &Dropout2Out);
{ {
......
...@@ -19,27 +19,27 @@ ...@@ -19,27 +19,27 @@
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
std::tuple<paddle::experimental::Tensor, std::tuple<paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor, paddle::Tensor,
paddle::experimental::Tensor> paddle::Tensor>
fused_gate_attention_dygraph_function( fused_gate_attention_dygraph_function(
const paddle::experimental::Tensor& Query, const paddle::Tensor& Query,
const paddle::experimental::Tensor& Key, const paddle::Tensor& Key,
const paddle::experimental::Tensor& QueryWeight, const paddle::Tensor& QueryWeight,
const paddle::experimental::Tensor& KeyWeight, const paddle::Tensor& KeyWeight,
const paddle::experimental::Tensor& ValueWeight, const paddle::Tensor& ValueWeight,
const paddle::experimental::Tensor& QKVWeight, const paddle::Tensor& QKVWeight,
const paddle::experimental::Tensor& NonbatchedBias, const paddle::Tensor& NonbatchedBias,
const paddle::experimental::Tensor& SrcMask, const paddle::Tensor& SrcMask,
const paddle::experimental::Tensor& GateWeight, const paddle::Tensor& GateWeight,
const paddle::experimental::Tensor& GateBias, const paddle::Tensor& GateBias,
const paddle::experimental::Tensor& OutLinearWeight, const paddle::Tensor& OutLinearWeight,
const paddle::experimental::Tensor& OutLinearBias, const paddle::Tensor& OutLinearBias,
const paddle::framework::AttributeMap& attr_map) { const paddle::framework::AttributeMap& attr_map) {
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"fused_gate_attention dygraph", "fused_gate_attention dygraph",
...@@ -52,8 +52,7 @@ fused_gate_attention_dygraph_function( ...@@ -52,8 +52,7 @@ fused_gate_attention_dygraph_function(
paddle::imperative::AmpLevel::O0) { paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
amp_tensors_vector = { amp_tensors_vector = {
{Query}, {SrcMask}, {OutLinearWeight}, {OutLinearBias}}; {Query}, {SrcMask}, {OutLinearWeight}, {OutLinearBias}};
if (Key.initialized()) amp_tensors_vector.push_back({Key}); if (Key.initialized()) amp_tensors_vector.push_back({Key});
...@@ -247,21 +246,21 @@ fused_gate_attention_dygraph_function( ...@@ -247,21 +246,21 @@ fused_gate_attention_dygraph_function(
true, true,
{}); {});
paddle::experimental::Tensor QueryTransposeOut; paddle::Tensor QueryTransposeOut;
egr::EagerUtils::GetOutput(outs["QueryTransposeOut"][0], &QueryTransposeOut); egr::EagerUtils::GetOutput(outs["QueryTransposeOut"][0], &QueryTransposeOut);
paddle::experimental::Tensor KeyTransposeOut; paddle::Tensor KeyTransposeOut;
egr::EagerUtils::GetOutput(outs["KeyTransposeOut"][0], &KeyTransposeOut); egr::EagerUtils::GetOutput(outs["KeyTransposeOut"][0], &KeyTransposeOut);
paddle::experimental::Tensor ValueTransposeOut; paddle::Tensor ValueTransposeOut;
egr::EagerUtils::GetOutput(outs["ValueTransposeOut"][0], &ValueTransposeOut); egr::EagerUtils::GetOutput(outs["ValueTransposeOut"][0], &ValueTransposeOut);
paddle::experimental::Tensor QKVTransposeOut; paddle::Tensor QKVTransposeOut;
egr::EagerUtils::GetOutput(outs["QKVTransposeOut"][0], &QKVTransposeOut); egr::EagerUtils::GetOutput(outs["QKVTransposeOut"][0], &QKVTransposeOut);
paddle::experimental::Tensor SoftmaxOut; paddle::Tensor SoftmaxOut;
egr::EagerUtils::GetOutput(outs["SoftmaxOut"][0], &SoftmaxOut); egr::EagerUtils::GetOutput(outs["SoftmaxOut"][0], &SoftmaxOut);
paddle::experimental::Tensor FMHAOut; paddle::Tensor FMHAOut;
egr::EagerUtils::GetOutput(outs["FMHAOut"][0], &FMHAOut); egr::EagerUtils::GetOutput(outs["FMHAOut"][0], &FMHAOut);
paddle::experimental::Tensor GateOut; paddle::Tensor GateOut;
egr::EagerUtils::GetOutput(outs["GateOut"][0], &GateOut); egr::EagerUtils::GetOutput(outs["GateOut"][0], &GateOut);
paddle::experimental::Tensor Out; paddle::Tensor Out;
egr::EagerUtils::GetOutput(outs["Out"][0], &Out); egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
{ {
......
...@@ -20,10 +20,10 @@ ...@@ -20,10 +20,10 @@
#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function( paddle::Tensor fused_gemm_epilogue_dygraph_function(
const paddle::experimental::Tensor& X, const paddle::Tensor& X,
const paddle::experimental::Tensor& Y, const paddle::Tensor& Y,
const paddle::experimental::Tensor& Bias, const paddle::Tensor& Bias,
const paddle::framework::AttributeMap& attr_map) { const paddle::framework::AttributeMap& attr_map) {
paddle::platform::RecordEvent dygraph_entrance_record_event( paddle::platform::RecordEvent dygraph_entrance_record_event(
"fused_gemm_epilogue dygraph", "fused_gemm_epilogue dygraph",
...@@ -36,8 +36,7 @@ paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function( ...@@ -36,8 +36,7 @@ paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function(
paddle::imperative::AmpLevel::O0) { paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
amp_tensors_vector = {{X}, {Y}, {Bias}}; amp_tensors_vector = {{X}, {Y}, {Bias}};
auto amp_dst_dtype = auto amp_dst_dtype =
...@@ -90,7 +89,7 @@ paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function( ...@@ -90,7 +89,7 @@ paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function(
true, true,
{}); {});
paddle::experimental::Tensor Out; paddle::Tensor Out;
egr::EagerUtils::GetOutput(outs["Out"][0], &Out); egr::EagerUtils::GetOutput(outs["Out"][0], &Out);
{ {
......
...@@ -20,20 +20,17 @@ ...@@ -20,20 +20,17 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
fused_attentionGradNodeCompat::operator()( fused_attentionGradNodeCompat::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: fused_attentionGradNodeCompat"; VLOG(3) << "Running Eager Backward Node: fused_attentionGradNodeCompat";
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
outputs(23); outputs(23);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
hooked_grads0 = fused_attentionGradNodeCompat::ApplyGradientHooks(grads); hooked_grads0 = fused_attentionGradNodeCompat::ApplyGradientHooks(grads);
bool pre_layer_norm = false; bool pre_layer_norm = false;
......
...@@ -20,21 +20,18 @@ ...@@ -20,21 +20,18 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
fused_bias_dropout_residual_layer_normGradNodeCompat::operator()( fused_bias_dropout_residual_layer_normGradNodeCompat::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
outputs(5); outputs(5);
VLOG(3) << "Running Eager Backward Node: " VLOG(3) << "Running Eager Backward Node: "
"fused_bias_dropout_residual_layer_normGradNodeCompat"; "fused_bias_dropout_residual_layer_normGradNodeCompat";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
hooked_grads0 = fused_bias_dropout_residual_layer_normGradNodeCompat:: hooked_grads0 = fused_bias_dropout_residual_layer_normGradNodeCompat::
ApplyGradientHooks(grads); ApplyGradientHooks(grads);
std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins0 = std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins0 =
......
...@@ -20,21 +20,18 @@ ...@@ -20,21 +20,18 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
fused_feedforwardGradNodeCompat::operator()( fused_feedforwardGradNodeCompat::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: fused_feedforwardGradNodeCompat"; VLOG(3) << "Running Eager Backward Node: fused_feedforwardGradNodeCompat";
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
outputs(11); outputs(11);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
hooked_grads0 = hooked_grads0 =
fused_feedforwardGradNodeCompat::ApplyGradientHooks(grads); fused_feedforwardGradNodeCompat::ApplyGradientHooks(grads);
......
...@@ -20,21 +20,18 @@ ...@@ -20,21 +20,18 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
fused_gate_attentionGradNodeCompat::operator()( fused_gate_attentionGradNodeCompat::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: fused_gate_attentionGradNodeCompat"; VLOG(3) << "Running Eager Backward Node: fused_gate_attentionGradNodeCompat";
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
outputs(12); outputs(12);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
hooked_grads0 = hooked_grads0 =
fused_gate_attentionGradNodeCompat::ApplyGradientHooks(grads); fused_gate_attentionGradNodeCompat::ApplyGradientHooks(grads);
......
...@@ -20,20 +20,17 @@ ...@@ -20,20 +20,17 @@
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
fused_gemm_epilogueGradNodeCompat::operator()( fused_gemm_epilogueGradNodeCompat::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, egr::kSlotSmallVectorSize>& grads,
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
outputs(3); outputs(3);
VLOG(3) << "Running Eager Backward Node: fused_gemm_epilogueGradNodeCompat"; VLOG(3) << "Running Eager Backward Node: fused_gemm_epilogueGradNodeCompat";
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
hooked_grads0 = hooked_grads0 =
fused_gemm_epilogueGradNodeCompat::ApplyGradientHooks(grads); fused_gemm_epilogueGradNodeCompat::ApplyGradientHooks(grads);
std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins0 = std::map<std::string, std::vector<std::shared_ptr<egr::EagerVariable>>> ins0 =
......
...@@ -24,9 +24,8 @@ namespace egr { ...@@ -24,9 +24,8 @@ namespace egr {
namespace egr_utils_api { namespace egr_utils_api {
int64_t RegisterGradientHookForTensor( int64_t RegisterGradientHookForTensor(
const paddle::experimental::Tensor& tensor, const paddle::Tensor& tensor,
const std::function<paddle::experimental::Tensor( const std::function<paddle::Tensor(const paddle::Tensor&)>& hook) {
const paddle::experimental::Tensor&)>& hook) {
// Find grad_node and out_rank from AutogradMeta // Find grad_node and out_rank from AutogradMeta
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor); std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(tensor);
auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo(); auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo();
...@@ -37,7 +36,7 @@ int64_t RegisterGradientHookForTensor( ...@@ -37,7 +36,7 @@ int64_t RegisterGradientHookForTensor(
std::move(std::make_shared<CppTensorHook>(hook))); std::move(std::make_shared<CppTensorHook>(hook)));
} }
void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, void RegisterReduceHookForTensor(const paddle::Tensor& tensor,
const std::function<void()>& hook) { const std::function<void()>& hook) {
if (IsLeafTensor(tensor)) { if (IsLeafTensor(tensor)) {
VLOG(6) << "Register ReduceHook for leaf tensor"; VLOG(6) << "Register ReduceHook for leaf tensor";
...@@ -57,7 +56,7 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, ...@@ -57,7 +56,7 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
} }
} }
void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { void RetainGradForTensor(const paddle::Tensor& tensor) {
if (IsLeafTensor(tensor)) { if (IsLeafTensor(tensor)) {
// Leaf tensor's grad will always be retained // Leaf tensor's grad will always be retained
// Refer to implementation of AccumulationNode for more details // Refer to implementation of AccumulationNode for more details
...@@ -70,11 +69,10 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { ...@@ -70,11 +69,10 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
meta->SetRetainGrads(true); meta->SetRetainGrads(true);
} }
std::weak_ptr<paddle::experimental::Tensor> weak_grad_tensor = std::weak_ptr<paddle::Tensor> weak_grad_tensor = meta->WeakGrad();
meta->WeakGrad();
// Define Hook // Define Hook
auto hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) { auto hook = [weak_grad_tensor](const paddle::Tensor& t) {
if (!weak_grad_tensor.expired()) { if (!weak_grad_tensor.expired()) {
auto grad_tensor = weak_grad_tensor.lock(); auto grad_tensor = weak_grad_tensor.lock();
if (t.defined()) { if (t.defined()) {
...@@ -84,12 +82,12 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { ...@@ -84,12 +82,12 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
grad_tensor->set_autograd_meta(t.mutable_autograd_meta()); grad_tensor->set_autograd_meta(t.mutable_autograd_meta());
return *grad_tensor.get(); return *grad_tensor.get();
} else { } else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook"; VLOG(7) << "Retain NULL paddle::Tensor in Grad Hook";
return paddle::experimental::Tensor(); return paddle::Tensor();
} }
} else { } else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook"; VLOG(7) << "Retain NULL paddle::Tensor in Grad Hook";
return paddle::experimental::Tensor(); return paddle::Tensor();
} }
}; };
......
...@@ -22,13 +22,12 @@ namespace egr { ...@@ -22,13 +22,12 @@ namespace egr {
namespace egr_utils_api { namespace egr_utils_api {
int64_t RegisterGradientHookForTensor( int64_t RegisterGradientHookForTensor(
const paddle::experimental::Tensor& tensor, const paddle::Tensor& tensor,
const std::function<paddle::experimental::Tensor( const std::function<paddle::Tensor(const paddle::Tensor&)>& hook);
const paddle::experimental::Tensor&)>& hook);
void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, void RegisterReduceHookForTensor(const paddle::Tensor& tensor,
const std::function<void()>& hook); const std::function<void()>& hook);
void RetainGradForTensor(const paddle::experimental::Tensor& tensor); void RetainGradForTensor(const paddle::Tensor& tensor);
void RegisterBackwardFinalHook(const std::function<void()>& hook); void RegisterBackwardFinalHook(const std::function<void()>& hook);
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
namespace egr { namespace egr {
namespace egr_utils_api { namespace egr_utils_api {
bool IsLeafTensor(const paddle::experimental::Tensor& target) { bool IsLeafTensor(const paddle::Tensor& target) {
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(target); std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(target);
if (!grad_node || if (!grad_node ||
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node)) { std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node)) {
...@@ -37,14 +37,13 @@ bool IsLeafTensor(const paddle::experimental::Tensor& target) { ...@@ -37,14 +37,13 @@ bool IsLeafTensor(const paddle::experimental::Tensor& target) {
return false; return false;
} }
paddle::experimental::Tensor CreateTensorWithValue( paddle::Tensor CreateTensorWithValue(const phi::DDim& ddim,
const phi::DDim& ddim, const paddle::platform::Place& place,
const paddle::platform::Place& place, const phi::DataType& dtype,
const phi::DataType& dtype, const phi::DataLayout& layout,
const phi::DataLayout& layout, float value,
float value, bool is_leaf) {
bool is_leaf) { paddle::Tensor out = paddle::experimental::full(
paddle::experimental::Tensor out = paddle::experimental::full(
phi::vectorize(ddim), paddle::experimental::Scalar(value), dtype, place); phi::vectorize(ddim), paddle::experimental::Scalar(value), dtype, place);
auto meta = EagerUtils::autograd_meta(&out); auto meta = EagerUtils::autograd_meta(&out);
......
...@@ -22,15 +22,14 @@ namespace egr_utils_api { ...@@ -22,15 +22,14 @@ namespace egr_utils_api {
// If and only if the tensor holds an AccumulationNode // If and only if the tensor holds an AccumulationNode
// Then it's treated as a leaf tensor // Then it's treated as a leaf tensor
bool IsLeafTensor(const paddle::experimental::Tensor& target); bool IsLeafTensor(const paddle::Tensor& target);
paddle::experimental::Tensor CreateTensorWithValue( paddle::Tensor CreateTensorWithValue(const phi::DDim& ddim,
const phi::DDim& ddim, const paddle::platform::Place& place,
const paddle::platform::Place& place, const phi::DataType& dtype,
const phi::DataType& dtype, const phi::DataLayout& layout,
const phi::DataLayout& layout, float value,
float value, bool is_leaf = true);
bool is_leaf = true);
} // namespace egr_utils_api } // namespace egr_utils_api
} // namespace egr } // namespace egr
...@@ -1438,10 +1438,10 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1438,10 +1438,10 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
Controller.Instance().GetExpectedPlace(), {}); Controller.Instance().GetExpectedPlace(), {});
// According to fwd_outputs_names // According to fwd_outputs_names
std::vector<paddle::experimental::Tensor> Out0 = std::vector<paddle::Tensor> Out0 =
GetOutputs(outs["Out0"]); GetOutputs(outs["Out0"]);
paddle::experimental::Tensor Out1 = GetOutputs(outs["Out1"][0]); paddle::Tensor Out1 = GetOutputs(outs["Out1"][0]);
std::vector<paddle::experimental::Tensor> Out2 = std::vector<paddle::Tensor> Out2 =
GetOutputs(outs["Out2"]); GetOutputs(outs["Out2"]);
// Grad Node Generation Codes // Grad Node Generation Codes
...@@ -1480,7 +1480,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1480,7 +1480,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
if (input.duplicable()) { if (input.duplicable()) {
const char* FWD_INS_ARG_TEMPLATE = const char* FWD_INS_ARG_TEMPLATE =
"const std::vector<paddle::experimental::Tensor>& %s"; "const std::vector<paddle::Tensor>& %s";
input_args_str_list[input_position] = paddle::string::Sprintf( input_args_str_list[input_position] = paddle::string::Sprintf(
FWD_INS_ARG_TEMPLATE, LegalizeVarName(input_name)); FWD_INS_ARG_TEMPLATE, LegalizeVarName(input_name));
amp_function_call_args_str_list[input_position] = amp_function_call_args_str_list[input_position] =
...@@ -1495,13 +1495,13 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1495,13 +1495,13 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
for (auto& inplace_pair : forward_inplace_map) { for (auto& inplace_pair : forward_inplace_map) {
if (inplace_pair.second == input_name) { if (inplace_pair.second == input_name) {
flag_find_input_name = true; flag_find_input_name = true;
FWD_INS_ARG_TEMPLATE = "paddle::experimental::Tensor& %s"; FWD_INS_ARG_TEMPLATE = "paddle::Tensor& %s";
break; break;
} }
} }
} }
if (!flag_find_input_name) { if (!flag_find_input_name) {
FWD_INS_ARG_TEMPLATE = "const paddle::experimental::Tensor& %s"; FWD_INS_ARG_TEMPLATE = "const paddle::Tensor& %s";
} }
input_args_str_list[input_position] = paddle::string::Sprintf( input_args_str_list[input_position] = paddle::string::Sprintf(
FWD_INS_ARG_TEMPLATE, LegalizeVarName(input_name)); FWD_INS_ARG_TEMPLATE, LegalizeVarName(input_name));
...@@ -1645,8 +1645,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1645,8 +1645,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
// argument(EagerVariable*/vector<EagerVariable*>&), // argument(EagerVariable*/vector<EagerVariable*>&),
// in form of shared_ptr<EagerVariable>/vector<shared_ptr<EagerVariable>> // in form of shared_ptr<EagerVariable>/vector<shared_ptr<EagerVariable>>
if (output.duplicable()) { if (output.duplicable()) {
const char* FWD_NUM_ARG_TEMPLATE = const char* FWD_NUM_ARG_TEMPLATE = ", std::vector<paddle::Tensor*>& %s";
", std::vector<paddle::experimental::Tensor*>& %s";
std::string arg_str = paddle::string::Sprintf( std::string arg_str = paddle::string::Sprintf(
FWD_NUM_ARG_TEMPLATE, LegalizeVarName(output_var_name)); FWD_NUM_ARG_TEMPLATE, LegalizeVarName(output_var_name));
dygraph_function_args_str += arg_str; dygraph_function_args_str += arg_str;
...@@ -1654,7 +1653,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1654,7 +1653,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
core_ops_legacy_args_type_info[op_type].push_back("list"); core_ops_legacy_args_type_info[op_type].push_back("list");
} else { } else {
const char* FWD_NUM_ARG_TEMPLATE = ", paddle::experimental::Tensor* %s"; const char* FWD_NUM_ARG_TEMPLATE = ", paddle::Tensor* %s";
std::string arg_str = paddle::string::Sprintf( std::string arg_str = paddle::string::Sprintf(
FWD_NUM_ARG_TEMPLATE, LegalizeVarName(output_var_name)); FWD_NUM_ARG_TEMPLATE, LegalizeVarName(output_var_name));
dygraph_function_args_str += arg_str; dygraph_function_args_str += arg_str;
...@@ -1742,7 +1741,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1742,7 +1741,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
std::string amp_logic_str = ""; std::string amp_logic_str = "";
if (in_vars.size() != 0) { if (in_vars.size() != 0) {
const char* AMP_TENSORS_VECTOR_TEMPLATE = const char* AMP_TENSORS_VECTOR_TEMPLATE =
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, " " paddle::small_vector<std::vector<paddle::Tensor>, "
"egr::kSlotSmallVectorSize> " "egr::kSlotSmallVectorSize> "
"amp_tensors_vector = { " "amp_tensors_vector = { "
"%s };\n"; "%s };\n";
...@@ -1897,7 +1896,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1897,7 +1896,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
if (op_passing_outs_map[op_type].count(output_name)) { if (op_passing_outs_map[op_type].count(output_name)) {
if (output.dispensable()) { if (output.dispensable()) {
const char* FWD_OUT_TENSORS_TEMPLATE = const char* FWD_OUT_TENSORS_TEMPLATE =
" std::vector<paddle::experimental::Tensor> %s;\n" " std::vector<paddle::Tensor> %s;\n"
" if (outs.count(\"%s\")) " " if (outs.count(\"%s\")) "
"egr::EagerUtils::GetOutputs(outs[\"%s\"], %s);\n" "egr::EagerUtils::GetOutputs(outs[\"%s\"], %s);\n"
" egr::EagerUtils::Output2Result(%s, &%s);\n"; " egr::EagerUtils::Output2Result(%s, &%s);\n";
...@@ -1910,7 +1909,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1910,7 +1909,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
output_varname); output_varname);
} else { } else {
const char* FWD_OUT_TENSORS_TEMPLATE = const char* FWD_OUT_TENSORS_TEMPLATE =
" std::vector<paddle::experimental::Tensor> %s;\n" " std::vector<paddle::Tensor> %s;\n"
" egr::EagerUtils::GetOutputs(outs[\"%s\"], %s);\n" " egr::EagerUtils::GetOutputs(outs[\"%s\"], %s);\n"
" egr::EagerUtils::Output2Result(%s, &%s);\n"; " egr::EagerUtils::Output2Result(%s, &%s);\n";
out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSORS_TEMPLATE, out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSORS_TEMPLATE,
...@@ -1922,22 +1921,21 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1922,22 +1921,21 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
} }
} else { } else {
const char* FWD_OUT_TENSORS_TEMPLATE = const char* FWD_OUT_TENSORS_TEMPLATE =
" std::vector<paddle::experimental::Tensor> %s;\n" " std::vector<paddle::Tensor> %s;\n"
" egr::EagerUtils::GetOutputs(outs[\"%s\"], &%s);\n"; " egr::EagerUtils::GetOutputs(outs[\"%s\"], &%s);\n";
out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSORS_TEMPLATE, out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSORS_TEMPLATE,
output_varname, output_varname,
output_name, output_name,
output_varname); output_varname);
} }
return_types[return_position] = return_types[return_position] = "std::vector<paddle::Tensor>";
"std::vector<paddle::experimental::Tensor>";
} else { } else {
if (op_passing_outs_map[op_type].count(output_name)) { if (op_passing_outs_map[op_type].count(output_name)) {
if (output.dispensable()) { if (output.dispensable()) {
const char* FWD_OUT_TENSOR_TEMPLATE = const char* FWD_OUT_TENSOR_TEMPLATE =
" if (outs.count(\"%s\")) " " if (outs.count(\"%s\")) "
"egr::EagerUtils::GetOutput(outs[\"%s\"][0], %s);\n" "egr::EagerUtils::GetOutput(outs[\"%s\"][0], %s);\n"
" paddle::experimental::Tensor& %s = *%s;\n"; " paddle::Tensor& %s = *%s;\n";
out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE,
output_name, output_name,
output_name, output_name,
...@@ -1947,7 +1945,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1947,7 +1945,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
} else { } else {
const char* FWD_OUT_TENSOR_TEMPLATE = const char* FWD_OUT_TENSOR_TEMPLATE =
" egr::EagerUtils::GetOutput(outs[\"%s\"][0], %s);\n" " egr::EagerUtils::GetOutput(outs[\"%s\"][0], %s);\n"
" paddle::experimental::Tensor& %s = *%s;\n"; " paddle::Tensor& %s = *%s;\n";
out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE,
output_name, output_name,
output_var_args_name, output_var_args_name,
...@@ -1973,7 +1971,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1973,7 +1971,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
LegalizeVarName(inplace_input_name)); LegalizeVarName(inplace_input_name));
} else { } else {
const char* FWD_OUT_TENSOR_TEMPLATE = const char* FWD_OUT_TENSOR_TEMPLATE =
" paddle::experimental::Tensor %s;\n" " paddle::Tensor %s;\n"
" egr::EagerUtils::GetOutput(outs[\"%s\"][0], &%s);\n"; " egr::EagerUtils::GetOutput(outs[\"%s\"][0], &%s);\n";
out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE,
output_varname, output_varname,
...@@ -1981,7 +1979,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1981,7 +1979,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
output_varname); output_varname);
} }
} }
return_types[return_position] = "paddle::experimental::Tensor"; return_types[return_position] = "paddle::Tensor";
} }
if (!forward_inplace_map.empty() && if (!forward_inplace_map.empty() &&
...@@ -2160,7 +2158,7 @@ static std::string GenerateSingleOpBase( ...@@ -2160,7 +2158,7 @@ static std::string GenerateSingleOpBase(
} }
generated_grad_function_body += fill_zero_str; generated_grad_function_body += fill_zero_str;
generated_grad_function_body += generated_grad_function_body +=
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, " " paddle::small_vector<std::vector<paddle::Tensor>, "
"egr::kSlotSmallVectorSize> " + "egr::kSlotSmallVectorSize> " +
hooked_grads + " = " + fwd_op_type + hooked_grads + " = " + fwd_op_type +
"GradNodeCompat::ApplyGradientHooks(grads);\n"; "GradNodeCompat::ApplyGradientHooks(grads);\n";
...@@ -2675,7 +2673,7 @@ static std::string GenerateGradNodeCCContents( ...@@ -2675,7 +2673,7 @@ static std::string GenerateGradNodeCCContents(
egr::Controller::Instance().ExpectedPlace(), false, {}); egr::Controller::Instance().ExpectedPlace(), false, {});
} }
vector<vector<paddle::experimental::Tensor>> outputs(outs.size()); vector<vector<paddle::Tensor>> outputs(outs.size());
for(auto& kv : outs) { for(auto& kv : outs) {
outputs["fwd_inputs_name_pos_map[grad_outs_slotname_map[kv.first]]"] = outputs["fwd_inputs_name_pos_map[grad_outs_slotname_map[kv.first]]"] =
GetOutputs(outs["kv.first"]); GetOutputs(outs["kv.first"]);
...@@ -2751,7 +2749,7 @@ static std::string GenerateGradNodeCCContents( ...@@ -2751,7 +2749,7 @@ static std::string GenerateGradNodeCCContents(
const char* BWD_RETURN_TEMPLATE = const char* BWD_RETURN_TEMPLATE =
" const auto& out_metas = OutputMeta();\n" " const auto& out_metas = OutputMeta();\n"
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, " " paddle::small_vector<std::vector<paddle::Tensor>, "
"egr::kSlotSmallVectorSize> outputs(%d);\n" "egr::kSlotSmallVectorSize> outputs(%d);\n"
"%s\n" "%s\n"
" if(NeedComplexToRealConversion()) " " if(NeedComplexToRealConversion()) "
...@@ -2762,10 +2760,10 @@ static std::string GenerateGradNodeCCContents( ...@@ -2762,10 +2760,10 @@ static std::string GenerateGradNodeCCContents(
// [Generation] Get Full Grad Function // [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE = const char* GRAD_FUNCTION_TEMPLATE =
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, " "paddle::small_vector<std::vector<paddle::Tensor>, "
"egr::kSlotSmallVectorSize> " "egr::kSlotSmallVectorSize> "
"%sGradNodeCompat::operator()(" "%sGradNodeCompat::operator()("
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, " "paddle::small_vector<std::vector<paddle::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool " "egr::kSlotSmallVectorSize>& grads, bool "
"create_graph, bool is_new_grad) {\n" "create_graph, bool is_new_grad) {\n"
"%s" "%s"
...@@ -2804,10 +2802,10 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -2804,10 +2802,10 @@ static std::string GenerateGradNodeHeaderContents(
"%sGradNodeCompat \"; }\n" "%sGradNodeCompat \"; }\n"
"\n" "\n"
" virtual " " virtual "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, " "paddle::small_vector<std::vector<paddle::Tensor>, "
"egr::kSlotSmallVectorSize> " "egr::kSlotSmallVectorSize> "
"operator()(" "operator()("
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, " "paddle::small_vector<std::vector<paddle::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool " "egr::kSlotSmallVectorSize>& grads, bool "
"create_graph = false, bool is_new_grad = false) " "create_graph = false, bool is_new_grad = false) "
"override;\n" "override;\n"
...@@ -2883,7 +2881,7 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -2883,7 +2881,7 @@ static std::string GenerateGradNodeHeaderContents(
} }
if (duplicable_tensors.count(tensor_wrapper_name)) { if (duplicable_tensors.count(tensor_wrapper_name)) {
const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE = const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE =
"const std::vector<paddle::experimental::Tensor>& %s"; "const std::vector<paddle::Tensor>& %s";
tensor_wrapper_arg_str = paddle::string::Sprintf( tensor_wrapper_arg_str = paddle::string::Sprintf(
ATTR_TENSOR_WRAPPER_ARG_TEMPLATE, tensor_wrapper_name); ATTR_TENSOR_WRAPPER_ARG_TEMPLATE, tensor_wrapper_name);
...@@ -2912,7 +2910,7 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -2912,7 +2910,7 @@ static std::string GenerateGradNodeHeaderContents(
} else { } else {
const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE = const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE =
"const paddle::experimental::Tensor& %s"; "const paddle::Tensor& %s";
tensor_wrapper_arg_str = paddle::string::Sprintf( tensor_wrapper_arg_str = paddle::string::Sprintf(
ATTR_TENSOR_WRAPPER_ARG_TEMPLATE, tensor_wrapper_name); ATTR_TENSOR_WRAPPER_ARG_TEMPLATE, tensor_wrapper_name);
......
...@@ -81,12 +81,12 @@ def ParseArguments(): ...@@ -81,12 +81,12 @@ def ParseArguments():
###################### ######################
# Code Gen Templates # # Code Gen Templates #
###################### ######################
SET_PLAIN_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper{}(const paddle::experimental::Tensor& {}) {{ SET_PLAIN_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper{}(const paddle::Tensor& {}) {{
{} = egr::TensorWrapper({}, {}); {} = egr::TensorWrapper({}, {});
}} }}
""" """
SET_VECTOR_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper{}(const std::vector<paddle::experimental::Tensor>& {}) {{ SET_VECTOR_TENSOR_WRAPPER_TEMPLATE = """ void SetTensorWrapper{}(const std::vector<paddle::Tensor>& {}) {{
for(const auto& eager_tensor : {}) {{ for(const auto& eager_tensor : {}) {{
{}.emplace_back(egr::TensorWrapper(eager_tensor, {})); {}.emplace_back(egr::TensorWrapper(eager_tensor, {}));
}}; }};
...@@ -126,8 +126,8 @@ class {} : public egr::GradNodeBase {{ ...@@ -126,8 +126,8 @@ class {} : public egr::GradNodeBase {{
egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}} egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}}
~{}() override = default; ~{}() override = default;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> operator()( virtual paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize> operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph = false, bool is_new_grad = false) override; paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph = false, bool is_new_grad = false) override;
std::string name() override {{ return \"{}\"; }} std::string name() override {{ return \"{}\"; }}
void ClearTensorWrappers() override {{ void ClearTensorWrappers() override {{
...@@ -152,7 +152,7 @@ class {} : public egr::GradNodeBase {{ ...@@ -152,7 +152,7 @@ class {} : public egr::GradNodeBase {{
""" """
GRAD_FUNCTION_TEMPLATE = """ GRAD_FUNCTION_TEMPLATE = """
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{ paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{
VLOG(3) << \"Running AD API GRAD: \" << \"{}\"; VLOG(3) << \"Running AD API GRAD: \" << \"{}\";
// Fill Zero For GradIn Tensors // Fill Zero For GradIn Tensors
{} {}
...@@ -419,7 +419,7 @@ BUMP_INPLACE_VERSION_TEMPLATE = """ ...@@ -419,7 +419,7 @@ BUMP_INPLACE_VERSION_TEMPLATE = """
AMP_LOGIC_TEMPLATE = """ if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{ AMP_LOGIC_TEMPLATE = """ if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
{} {}
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = {}; paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = {};
{} {}
{} {}
{} {}
...@@ -431,7 +431,7 @@ AMP_LOGIC_TEMPLATE = """ if (egr::Controller::Instance().GetAMPLevel() != paddl ...@@ -431,7 +431,7 @@ AMP_LOGIC_TEMPLATE = """ if (egr::Controller::Instance().GetAMPLevel() != paddl
""" """
LAYOUT_LOGIC_TEMPLATE = """ LAYOUT_LOGIC_TEMPLATE = """
if (egr::Controller::Instance().UseLayoutAutoTune()) {{ if (egr::Controller::Instance().UseLayoutAutoTune()) {{
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {}; paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize> tensors_vector = {};
{} {}
{} {}
VLOG(5) << "Check and Prepare For LAYOUT "<< op_name; VLOG(5) << "Check and Prepare For LAYOUT "<< op_name;
...@@ -443,18 +443,18 @@ LAYOUT_LOGIC_TEMPLATE = """ ...@@ -443,18 +443,18 @@ LAYOUT_LOGIC_TEMPLATE = """
}} }}
""" """
CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE = """ CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE = """
paddle::optional<paddle::experimental::Tensor> {}_optional; paddle::optional<paddle::Tensor> {}_optional;
if({}.initialized()) {}_optional = paddle::make_optional<paddle::experimental::Tensor>({}); if({}.initialized()) {}_optional = paddle::make_optional<paddle::Tensor>({});
""" """
CREATE_RECOVER_OPTIONAL_TENSOR_TEMPLATE = """ CREATE_RECOVER_OPTIONAL_TENSOR_TEMPLATE = """
paddle::optional<paddle::experimental::Tensor> {}_optional; paddle::optional<paddle::Tensor> {}_optional;
if( {}.impl() ) {}_optional = paddle::make_optional<paddle::experimental::Tensor>({}); if( {}.impl() ) {}_optional = paddle::make_optional<paddle::Tensor>({});
""" """
CREATE_RECOVER_OPTIONAL_VECTOR_TENSOR_TEMPLATE = """ CREATE_RECOVER_OPTIONAL_VECTOR_TENSOR_TEMPLATE = """
paddle::optional<std::vector<paddle::experimental::Tensor>> {}_optional; paddle::optional<std::vector<paddle::Tensor>> {}_optional;
if( !{}.empty() ) {}_optional = paddle::make_optional<std::vector<paddle::experimental::Tensor>>({}); if( !{}.empty() ) {}_optional = paddle::make_optional<std::vector<paddle::Tensor>>({});
""" """
CHECK_BACKWARD_INPLACE_TEMPLATE = """ CHECK_BACKWARD_INPLACE_TEMPLATE = """
...@@ -470,8 +470,8 @@ CHECK_NAN_AND_INF_TEMPLATE = """ if (FLAGS_check_nan_inf) {{ egr::CheckTensorHa ...@@ -470,8 +470,8 @@ CHECK_NAN_AND_INF_TEMPLATE = """ if (FLAGS_check_nan_inf) {{ egr::CheckTensorHa
""" """
inplace_optional_out_type_map = { inplace_optional_out_type_map = {
"Tensor": "paddle::optional<paddle::experimental::Tensor>&", "Tensor": "paddle::optional<paddle::Tensor>&",
"std::vector<Tensor>": "paddle::optional<std::vector<paddle::experimental::Tensor>>&", "std::vector<Tensor>": "paddle::optional<std::vector<paddle::Tensor>>&",
} }
...@@ -1282,9 +1282,11 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1282,9 +1282,11 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
and forward_inplace_map and forward_inplace_map
and name in forward_inplace_map.keys() and name in forward_inplace_map.keys()
): ):
arg_str = f"paddle::optional<paddle::experimental::Tensor>& {name}" arg_str = f"paddle::optional<paddle::Tensor>& {name}"
else: else:
arg_str = f"const paddle::optional<paddle::experimental::Tensor>& {name}" arg_str = (
f"const paddle::optional<paddle::Tensor>& {name}"
)
amp_tensors_vector_optional_list.append( amp_tensors_vector_optional_list.append(
f"if ({name}) amp_tensors_vector.push_back({{ *{name} }});\n" f"if ({name}) amp_tensors_vector.push_back({{ *{name} }});\n"
) )
...@@ -1303,13 +1305,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1303,13 +1305,13 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
and forward_inplace_map and forward_inplace_map
and name in forward_inplace_map.keys() and name in forward_inplace_map.keys()
): ):
arg_str = f"paddle::experimental::Tensor& {name}" arg_str = f"paddle::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}") amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append( amp_autocast_list.append(
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
) )
else: else:
arg_str = f"const paddle::experimental::Tensor& {name}" arg_str = f"const paddle::Tensor& {name}"
amp_tensors_vector_list.append(f"{{{name}}}") amp_tensors_vector_list.append(f"{{{name}}}")
amp_autocast_list.append( amp_autocast_list.append(
f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCast(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
...@@ -1326,9 +1328,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1326,9 +1328,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
and forward_inplace_map and forward_inplace_map
and name in forward_inplace_map.keys() and name in forward_inplace_map.keys()
): ):
arg_str = f"paddle::optional<std::vector<paddle::experimental::Tensor>>& {name}" arg_str = f"paddle::optional<std::vector<paddle::Tensor>>& {name}"
else: else:
arg_str = f"const paddle::optional<std::vector<paddle::experimental::Tensor>>& {name}" arg_str = f"const paddle::optional<std::vector<paddle::Tensor>>& {name}"
amp_tensors_vector_optional_list.append( amp_tensors_vector_optional_list.append(
f"if ({name}) amp_tensors_vector.push_back( *{name} );\n" f"if ({name}) amp_tensors_vector.push_back( *{name} );\n"
) )
...@@ -1344,11 +1346,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1344,11 +1346,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
and forward_inplace_map and forward_inplace_map
and name in forward_inplace_map.keys() and name in forward_inplace_map.keys()
): ):
arg_str = ( arg_str = f"std::vector<paddle::Tensor>& {name}"
f"std::vector<paddle::experimental::Tensor>& {name}"
)
else: else:
arg_str = f"const std::vector<paddle::experimental::Tensor>& {name}" arg_str = f"const std::vector<paddle::Tensor>& {name}"
amp_tensors_vector_list.append(f"{name}") amp_tensors_vector_list.append(f"{name}")
amp_autocast_list.append( amp_autocast_list.append(
f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n" f"auto new_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n"
...@@ -1432,9 +1432,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1432,9 +1432,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
rtype rtype
] ]
else: else:
returns_type_list[pos] = "paddle::experimental::Tensor&" returns_type_list[pos] = "paddle::Tensor&"
else: else:
returns_type_list[pos] = "paddle::experimental::Tensor" returns_type_list[pos] = "paddle::Tensor"
else: else:
assert IsVectorTensorType(rtype) assert IsVectorTensorType(rtype)
if ( if (
...@@ -1451,13 +1451,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1451,13 +1451,9 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
rtype rtype
] ]
else: else:
returns_type_list[ returns_type_list[pos] = "std::vector<paddle::Tensor>&"
pos
] = "std::vector<paddle::experimental::Tensor>&"
else: else:
returns_type_list[ returns_type_list[pos] = "std::vector<paddle::Tensor>"
pos
] = "std::vector<paddle::experimental::Tensor>"
if num_outputs == 1: if num_outputs == 1:
returns_str = returns_list[0] returns_str = returns_list[0]
...@@ -2163,7 +2159,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): ...@@ -2163,7 +2159,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
composite_grad_api_namespace = f"paddle::prim::{namespace}" composite_grad_api_namespace = f"paddle::prim::{namespace}"
grad_function_prepare_str = f""" grad_function_prepare_str = f"""
const auto& out_metas = OutputMeta(); const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs}); paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});
for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{
out_metas[i].size() == 0 ? returns[i].resize(1) : returns[i].resize(out_metas[i].size()); out_metas[i].size() == 0 ? returns[i].resize(1) : returns[i].resize(out_metas[i].size());
}} }}
...@@ -2221,7 +2217,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): ...@@ -2221,7 +2217,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
else: else:
assert IsVectorTensorType(ttype) assert IsVectorTensorType(ttype)
grad_function_prepare_str += f""" grad_function_prepare_str += f"""
std::vector<paddle::experimental::Tensor*> api_output_{out_index}; std::vector<paddle::Tensor*> api_output_{out_index};
api_output_{out_index}.reserve(returns[{fwd_position}].size()); api_output_{out_index}.reserve(returns[{fwd_position}].size());
for (size_t i = 0; i < returns[{fwd_position}].size(); ++i) {{ for (size_t i = 0; i < returns[{fwd_position}].size(); ++i) {{
if (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][i].IsStopGradient()) {{ if (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][i].IsStopGradient()) {{
...@@ -2233,7 +2229,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): ...@@ -2233,7 +2229,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
grad_api_args_str = ", ".join(grad_api_args) grad_api_args_str = ", ".join(grad_api_args)
composite_grad_api_args_str = ", ".join(grad_api_args) composite_grad_api_args_str = ", ".join(grad_api_args)
composite_template_name = "<paddle::experimental::Tensor>" composite_template_name = "<paddle::Tensor>"
if is_invoke_forward_api: if is_invoke_forward_api:
autograd_api_out = "auto" autograd_api_out = "auto"
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_node_info.h"
namespace egr { namespace egr {
using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta; using AbstractAutogradMeta = paddle::AbstractAutogradMeta;
/** /**
* *
* AutogradMeta is what record the backward info for tensor. When we run * AutogradMeta is what record the backward info for tensor. When we run
...@@ -68,7 +68,7 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -68,7 +68,7 @@ class AutogradMeta : public AbstractAutogradMeta {
~AutogradMeta() override = default; ~AutogradMeta() override = default;
const paddle::experimental::Tensor& Grad() const { const paddle::Tensor& Grad() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
grad_.get(), grad_.get(),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
...@@ -79,9 +79,9 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -79,9 +79,9 @@ class AutogradMeta : public AbstractAutogradMeta {
return *(grad_.get()); return *(grad_.get());
} }
paddle::experimental::Tensor* MutableGrad() { return grad_.get(); } paddle::Tensor* MutableGrad() { return grad_.get(); }
std::weak_ptr<paddle::experimental::Tensor> WeakGrad() { return grad_; } std::weak_ptr<paddle::Tensor> WeakGrad() { return grad_; }
void SetGradNode(const std::shared_ptr<GradNodeBase>& grad_node) { void SetGradNode(const std::shared_ptr<GradNodeBase>& grad_node) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -133,8 +133,7 @@ class AutogradMeta : public AbstractAutogradMeta { ...@@ -133,8 +133,7 @@ class AutogradMeta : public AbstractAutogradMeta {
private: private:
// TODO(jiabin) :Should we use pointer instead of object? // TODO(jiabin) :Should we use pointer instead of object?
std::shared_ptr<paddle::experimental::Tensor> grad_{ std::shared_ptr<paddle::Tensor> grad_{std::make_shared<paddle::Tensor>()};
std::make_shared<paddle::experimental::Tensor>()};
// GradNodeBase is base class of all grad op which is a // GradNodeBase is base class of all grad op which is a
// wrapper for grad op. This class will make grad op easy // wrapper for grad op. This class will make grad op easy
......
...@@ -82,8 +82,7 @@ void EnforceGradNodeHasInput(GradNodeBase* node) { ...@@ -82,8 +82,7 @@ void EnforceGradNodeHasInput(GradNodeBase* node) {
node->name())); node->name()));
} }
void DuplicateCheck(const std::vector<paddle::experimental::Tensor>& inputs, void DuplicateCheck(const std::vector<paddle::Tensor>& inputs, bool is_input) {
bool is_input) {
std::unordered_set<AutogradMeta*> visisted_ins; std::unordered_set<AutogradMeta*> visisted_ins;
std::string msg = is_input ? "inputs" : "outputs"; std::string msg = is_input ? "inputs" : "outputs";
for (auto in : inputs) { for (auto in : inputs) {
...@@ -102,14 +101,14 @@ void DuplicateCheck(const std::vector<paddle::experimental::Tensor>& inputs, ...@@ -102,14 +101,14 @@ void DuplicateCheck(const std::vector<paddle::experimental::Tensor>& inputs,
GeneralGrad* GeneralGrad::general_grad_ = new GeneralGrad(); GeneralGrad* GeneralGrad::general_grad_ = new GeneralGrad();
std::vector<paddle::experimental::Tensor> RunBackward( std::vector<paddle::Tensor> RunBackward(
const std::vector<paddle::experimental::Tensor>& tensors, // output const std::vector<paddle::Tensor>& tensors, // output
const std::vector<paddle::experimental::Tensor>& grad_tensors, const std::vector<paddle::Tensor>& grad_tensors,
bool retain_graph, bool retain_graph,
bool create_graph = false, bool create_graph = false,
const std::vector<paddle::experimental::Tensor>& inputs = {}, const std::vector<paddle::Tensor>& inputs = {},
bool allow_unused = false, bool allow_unused = false,
const std::vector<paddle::experimental::Tensor>& no_grad_vars = {}) { const std::vector<paddle::Tensor>& no_grad_vars = {}) {
VLOG(3) << "Start Backward"; VLOG(3) << "Start Backward";
// *Gradient Hook should happen at node-level // *Gradient Hook should happen at node-level
...@@ -128,7 +127,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -128,7 +127,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
std::unordered_map<GradNodeBase*, std::unique_ptr<GradTensorHolder>> std::unordered_map<GradNodeBase*, std::unique_ptr<GradTensorHolder>>
node_input_buffers_dict; node_input_buffers_dict;
for (size_t i = 0; i < tensors.size(); i++) { for (size_t i = 0; i < tensors.size(); i++) {
const paddle::experimental::Tensor& tensor = tensors[i]; const paddle::Tensor& tensor = tensors[i];
AutogradMeta* auto_grad_meta = EagerUtils::nullable_autograd_meta(tensor); AutogradMeta* auto_grad_meta = EagerUtils::nullable_autograd_meta(tensor);
if (auto_grad_meta == nullptr) { if (auto_grad_meta == nullptr) {
...@@ -255,8 +254,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -255,8 +254,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
VLOG(7) << "Run Backward Kernel with GradTensorHolder."; VLOG(7) << "Run Backward Kernel with GradTensorHolder.";
// Run Pre Backward Node and get outputs // Run Pre Backward Node and get outputs
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
grad_output_tensors = (*node)( grad_output_tensors = (*node)(
node_input_buffer->Buffers(), create_graph, is_general_grad); node_input_buffer->Buffers(), create_graph, is_general_grad);
...@@ -315,8 +313,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -315,8 +313,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
"grad_output_tensors[i].size(), which is: %d. This error may " "grad_output_tensors[i].size(), which is: %d. This error may "
"indicate autoprune or autograd api error. ", "indicate autoprune or autograd api error. ",
grad_output_tensors.size())); grad_output_tensors.size()));
paddle::experimental::Tensor& grad_output_tensor = paddle::Tensor& grad_output_tensor = grad_output_tensors[i][j];
grad_output_tensors[i][j];
if ((!grad_output_tensor.defined() || if ((!grad_output_tensor.defined() ||
!grad_output_tensor.initialized())) { !grad_output_tensor.initialized())) {
...@@ -380,10 +377,9 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -380,10 +377,9 @@ std::vector<paddle::experimental::Tensor> RunBackward(
return GeneralGrad::Instance().GetResults(inputs, allow_unused, create_graph); return GeneralGrad::Instance().GetResults(inputs, allow_unused, create_graph);
} }
void Backward( void Backward(const std::vector<paddle::Tensor>& tensors, // outputs
const std::vector<paddle::experimental::Tensor>& tensors, // outputs const std::vector<paddle::Tensor>& grad_tensors,
const std::vector<paddle::experimental::Tensor>& grad_tensors, bool retain_graph) {
bool retain_graph) {
VLOG(3) << "Run in Backward"; VLOG(3) << "Run in Backward";
paddle::platform::RecordEvent backward_record_event( paddle::platform::RecordEvent backward_record_event(
"backward", paddle::platform::TracerEventType::UserDefined, 1); "backward", paddle::platform::TracerEventType::UserDefined, 1);
...@@ -391,15 +387,15 @@ void Backward( ...@@ -391,15 +387,15 @@ void Backward(
phi::autotune::AutoTuneStatus::Instance().Update(); phi::autotune::AutoTuneStatus::Instance().Update();
} }
std::vector<paddle::experimental::Tensor> Grad( std::vector<paddle::Tensor> Grad(
const std::vector<paddle::experimental::Tensor>& tensors, // outputs const std::vector<paddle::Tensor>& tensors, // outputs
const std::vector<paddle::experimental::Tensor>& inputs, const std::vector<paddle::Tensor>& inputs,
const std::vector<paddle::experimental::Tensor>& grad_tensors, const std::vector<paddle::Tensor>& grad_tensors,
bool retain_graph, bool retain_graph,
bool create_graph, bool create_graph,
bool only_inputs, bool only_inputs,
bool allow_unused, bool allow_unused,
const std::vector<paddle::experimental::Tensor>& no_grad_vars) { const std::vector<paddle::Tensor>& no_grad_vars) {
VLOG(3) << "Run in Grad"; VLOG(3) << "Run in Grad";
DuplicateCheck(inputs, true /* is_input */); DuplicateCheck(inputs, true /* is_input */);
......
...@@ -22,19 +22,19 @@ namespace egr { ...@@ -22,19 +22,19 @@ namespace egr {
// Backward(): // Backward():
// tensors corresponds to those lived in the backward graph // tensors corresponds to those lived in the backward graph
// each grad_tensors[i] keeps the value for its corresponding tensors[i] // each grad_tensors[i] keeps the value for its corresponding tensors[i]
void Backward(const std::vector<paddle::experimental::Tensor>& tensors, void Backward(const std::vector<paddle::Tensor>& tensors,
const std::vector<paddle::experimental::Tensor>& grad_tensors, const std::vector<paddle::Tensor>& grad_tensors,
bool retain_graph = false); bool retain_graph = false);
std::vector<paddle::experimental::Tensor> Grad( std::vector<paddle::Tensor> Grad(
const std::vector<paddle::experimental::Tensor>& tensors, const std::vector<paddle::Tensor>& tensors,
const std::vector<paddle::experimental::Tensor>& inputs, const std::vector<paddle::Tensor>& inputs,
const std::vector<paddle::experimental::Tensor>& grad_tensors = {}, const std::vector<paddle::Tensor>& grad_tensors = {},
bool retain_graph = false, bool retain_graph = false,
bool create_graph = false, bool create_graph = false,
bool only_inputs = false, bool only_inputs = false,
bool allow_unused = false, bool allow_unused = false,
const std::vector<paddle::experimental::Tensor>& no_grad_vars = {}); const std::vector<paddle::Tensor>& no_grad_vars = {});
// Reserved for gradient() // Reserved for gradient()
......
...@@ -164,13 +164,11 @@ static void ConstructFwdAndBwdMap( ...@@ -164,13 +164,11 @@ static void ConstructFwdAndBwdMap(
} }
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize> RunCustomOpNode::operator()(paddle::small_vector<std::vector<paddle::Tensor>,
RunCustomOpNode::operator()( kSlotSmallVectorSize>& grads,
paddle::small_vector<std::vector<paddle::experimental::Tensor>, bool create_graph,
kSlotSmallVectorSize>& grads, bool is_new_grad) { // NOLINT
bool create_graph,
bool is_new_grad) { // NOLINT
paddle::CustomOpKernelContext ctx; paddle::CustomOpKernelContext ctx;
auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs( auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs(
egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]); egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]);
...@@ -179,8 +177,7 @@ RunCustomOpNode::operator()( ...@@ -179,8 +177,7 @@ RunCustomOpNode::operator()(
auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_); auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_);
auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap(); auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
tmp_ins(grad_inputs_name.size()); tmp_ins(grad_inputs_name.size());
VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size() VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size()
<< ", whose grad_inputs_name size is: " << grad_inputs_name.size(); << ", whose grad_inputs_name size is: " << grad_inputs_name.size();
...@@ -208,11 +205,9 @@ RunCustomOpNode::operator()( ...@@ -208,11 +205,9 @@ RunCustomOpNode::operator()(
} }
VLOG(6) << "Prepare Grad attrs"; VLOG(6) << "Prepare Grad attrs";
ctx.EmplaceBackAttrs(attrs_); ctx.EmplaceBackAttrs(attrs_);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize> outs(
kSlotSmallVectorSize> OutputMeta().size());
outs(OutputMeta().size()); paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
tmp_outs(grad_outputs_names.size()); tmp_outs(grad_outputs_names.size());
VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size(); VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
for (size_t i = 0; i < OutputMeta().size(); i++) { for (size_t i = 0; i < OutputMeta().size(); i++) {
...@@ -289,9 +284,8 @@ RunCustomOpNode::operator()( ...@@ -289,9 +284,8 @@ RunCustomOpNode::operator()(
// Prepare Grad outputs // Prepare Grad outputs
size_t no_grad_cnt = 0; size_t no_grad_cnt = 0;
for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) { for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& in_tensors = const std::vector<paddle::Tensor>& in_tensors = ctx.InputsBetween(
ctx.InputsBetween(ctx.InputRangeAt(i).first, ctx.InputRangeAt(i).first, ctx.InputRangeAt(i).second);
ctx.InputRangeAt(i).second);
if (slot_map[1][0].find(i) != slot_map[1][0].end()) { if (slot_map[1][0].find(i) != slot_map[1][0].end()) {
grad_node->SetGradOutMeta(in_tensors, slot_map[1][0][i]); grad_node->SetGradOutMeta(in_tensors, slot_map[1][0][i]);
...@@ -304,9 +298,8 @@ RunCustomOpNode::operator()( ...@@ -304,9 +298,8 @@ RunCustomOpNode::operator()(
// Prepare Grad inputs with grad of fwd outputs // Prepare Grad inputs with grad of fwd outputs
for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) { for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& out_tensors = const std::vector<paddle::Tensor>& out_tensors = ctx.OutputsBetweeen(
ctx.OutputsBetweeen(ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second);
ctx.OutputRangeAt(i).second);
egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i); egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i);
egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node); egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node);
grad_node->SetGradInMeta(out_tensors, i); grad_node->SetGradInMeta(out_tensors, i);
...@@ -347,11 +340,10 @@ RunCustomOpNode::operator()( ...@@ -347,11 +340,10 @@ RunCustomOpNode::operator()(
return outs; return outs;
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
RunCustomOpDoubleGradNode::operator()( RunCustomOpDoubleGradNode::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>&
kSlotSmallVectorSize>& grads, grads,
bool create_graph, bool create_graph,
bool is_new_grad) { // NOLINT bool is_new_grad) { // NOLINT
paddle::CustomOpKernelContext ctx; paddle::CustomOpKernelContext ctx;
...@@ -364,8 +356,7 @@ RunCustomOpDoubleGradNode::operator()( ...@@ -364,8 +356,7 @@ RunCustomOpDoubleGradNode::operator()(
auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_); auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_);
auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap(); auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
tmp_ins(grad_inputs_name.size()); tmp_ins(grad_inputs_name.size());
VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size() VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size()
<< ", whose grad_inputs_name size is: " << grad_inputs_name.size(); << ", whose grad_inputs_name size is: " << grad_inputs_name.size();
...@@ -395,11 +386,9 @@ RunCustomOpDoubleGradNode::operator()( ...@@ -395,11 +386,9 @@ RunCustomOpDoubleGradNode::operator()(
} }
VLOG(6) << "Prepare Grad attrs"; VLOG(6) << "Prepare Grad attrs";
ctx.EmplaceBackAttrs(attrs_); ctx.EmplaceBackAttrs(attrs_);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize> outs(
kSlotSmallVectorSize> OutputMeta().size());
outs(OutputMeta().size()); paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
tmp_outs(grad_outputs_names.size()); tmp_outs(grad_outputs_names.size());
VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size(); VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
......
...@@ -37,10 +37,10 @@ class RunCustomOpNode : public GradNodeBase { ...@@ -37,10 +37,10 @@ class RunCustomOpNode : public GradNodeBase {
} }
// Functor: perform backward computations // Functor: perform backward computations
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
operator()( // NOLINT operator()( // NOLINT
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, bool create_graph = false,
bool is_new_grad = false) // NOLINT bool is_new_grad = false) // NOLINT
...@@ -51,7 +51,7 @@ class RunCustomOpNode : public GradNodeBase { ...@@ -51,7 +51,7 @@ class RunCustomOpNode : public GradNodeBase {
} }
static std::vector<egr::TensorWrapper> ConstructTensorWrapper( static std::vector<egr::TensorWrapper> ConstructTensorWrapper(
const std::vector<paddle::experimental::Tensor>& fwd_var) { const std::vector<paddle::Tensor>& fwd_var) {
std::vector<egr::TensorWrapper> res; std::vector<egr::TensorWrapper> res;
for (auto const& var : fwd_var) { for (auto const& var : fwd_var) {
res.emplace_back(var); res.emplace_back(var);
...@@ -59,9 +59,9 @@ class RunCustomOpNode : public GradNodeBase { ...@@ -59,9 +59,9 @@ class RunCustomOpNode : public GradNodeBase {
return res; return res;
} }
static std::vector<paddle::experimental::Tensor> Recover( static std::vector<paddle::Tensor> Recover(
std::vector<egr::TensorWrapper>* fwd_var) { std::vector<egr::TensorWrapper>* fwd_var) {
std::vector<paddle::experimental::Tensor> res; std::vector<paddle::Tensor> res;
for (size_t i = 0; i < fwd_var->size(); i++) { for (size_t i = 0; i < fwd_var->size(); i++) {
res.emplace_back(fwd_var->at(i).recover()); res.emplace_back(fwd_var->at(i).recover());
} }
...@@ -107,10 +107,10 @@ class RunCustomOpDoubleGradNode : public GradNodeBase { ...@@ -107,10 +107,10 @@ class RunCustomOpDoubleGradNode : public GradNodeBase {
} }
// Functor: perform backward computations // Functor: perform backward computations
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
operator()( // NOLINT operator()( // NOLINT
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, bool create_graph = false,
bool is_new_grad = false) // NOLINT bool is_new_grad = false) // NOLINT
...@@ -122,7 +122,7 @@ class RunCustomOpDoubleGradNode : public GradNodeBase { ...@@ -122,7 +122,7 @@ class RunCustomOpDoubleGradNode : public GradNodeBase {
} }
static std::vector<egr::TensorWrapper> ConstructTensorWrapper( static std::vector<egr::TensorWrapper> ConstructTensorWrapper(
const std::vector<paddle::experimental::Tensor>& fwd_var) { const std::vector<paddle::Tensor>& fwd_var) {
std::vector<egr::TensorWrapper> res; std::vector<egr::TensorWrapper> res;
for (auto const& var : fwd_var) { for (auto const& var : fwd_var) {
res.emplace_back(var); res.emplace_back(var);
...@@ -130,9 +130,9 @@ class RunCustomOpDoubleGradNode : public GradNodeBase { ...@@ -130,9 +130,9 @@ class RunCustomOpDoubleGradNode : public GradNodeBase {
return res; return res;
} }
static std::vector<paddle::experimental::Tensor> Recover( static std::vector<paddle::Tensor> Recover(
std::vector<egr::TensorWrapper>* fwd_var) { std::vector<egr::TensorWrapper>* fwd_var) {
std::vector<paddle::experimental::Tensor> res; std::vector<paddle::Tensor> res;
for (size_t i = 0; i < fwd_var->size(); i++) { for (size_t i = 0; i < fwd_var->size(); i++) {
res.emplace_back(fwd_var->at(i).recover()); res.emplace_back(fwd_var->at(i).recover());
} }
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
namespace egr { namespace egr {
static inline bool NeedCast(const paddle::experimental::Tensor& tensor, static inline bool NeedCast(const paddle::Tensor& tensor,
const paddle::experimental::DataType& dst_dtype) { const paddle::experimental::DataType& dst_dtype) {
auto place = tensor.place(); auto place = tensor.place();
auto data_type = tensor.dtype(); auto data_type = tensor.dtype();
...@@ -45,10 +45,9 @@ static inline bool NeedCast(const paddle::experimental::Tensor& tensor, ...@@ -45,10 +45,9 @@ static inline bool NeedCast(const paddle::experimental::Tensor& tensor,
return false; return false;
} }
inline paddle::experimental::Tensor Cast( inline paddle::Tensor Cast(const paddle::Tensor& input,
const paddle::experimental::Tensor& input, const paddle::experimental::DataType& dst_dtype,
const paddle::experimental::DataType& dst_dtype, const bool trace_backward = true) {
const bool trace_backward = true) {
if (input.is_sparse_coo_tensor() || input.is_sparse_csr_tensor()) { if (input.is_sparse_coo_tensor() || input.is_sparse_csr_tensor()) {
if (trace_backward) { if (trace_backward) {
return sparse::cast_ad_func( return sparse::cast_ad_func(
...@@ -66,16 +65,16 @@ inline paddle::experimental::Tensor Cast( ...@@ -66,16 +65,16 @@ inline paddle::experimental::Tensor Cast(
} }
} }
inline std::vector<paddle::experimental::Tensor> EagerAmpAutoCasts( inline std::vector<paddle::Tensor> EagerAmpAutoCasts(
const std::string& inputs_name, const std::string& inputs_name,
const std::vector<paddle::experimental::Tensor>& inputs, const std::vector<paddle::Tensor>& inputs,
const paddle::experimental::DataType& dst_dtype, const paddle::experimental::DataType& dst_dtype,
std::string op_name, std::string op_name,
bool trace_backward = true) { bool trace_backward = true) {
VLOG(6) << "AMP AmpAutoCasts:" VLOG(6) << "AMP AmpAutoCasts:"
<< " inputs(" << inputs_name << ") dst_dtype(" << " inputs(" << inputs_name << ") dst_dtype("
<< phi::DataTypeToString(dst_dtype) << ")."; << phi::DataTypeToString(dst_dtype) << ").";
std::vector<paddle::experimental::Tensor> inputs_casted; std::vector<paddle::Tensor> inputs_casted;
for (auto& input : inputs) { for (auto& input : inputs) {
if (NeedCast(input, dst_dtype)) { if (NeedCast(input, dst_dtype)) {
inputs_casted.emplace_back(std::move(Cast(input, dst_dtype))); inputs_casted.emplace_back(std::move(Cast(input, dst_dtype)));
...@@ -86,9 +85,9 @@ inline std::vector<paddle::experimental::Tensor> EagerAmpAutoCasts( ...@@ -86,9 +85,9 @@ inline std::vector<paddle::experimental::Tensor> EagerAmpAutoCasts(
return inputs_casted; return inputs_casted;
} }
inline paddle::experimental::Tensor EagerAmpAutoCast( inline paddle::Tensor EagerAmpAutoCast(
const std::string& input_name, const std::string& input_name,
const paddle::experimental::Tensor& input, const paddle::Tensor& input,
const paddle::experimental::DataType& dst_dtype, const paddle::experimental::DataType& dst_dtype,
const std::string& op_name, const std::string& op_name,
bool trace_backward = true) { bool trace_backward = true) {
...@@ -119,9 +118,9 @@ inline paddle::experimental::Tensor EagerAmpAutoCast( ...@@ -119,9 +118,9 @@ inline paddle::experimental::Tensor EagerAmpAutoCast(
return input; return input;
} }
inline paddle::optional<paddle::experimental::Tensor> EagerAmpAutoCast( inline paddle::optional<paddle::Tensor> EagerAmpAutoCast(
const std::string& input_name, const std::string& input_name,
const paddle::optional<paddle::experimental::Tensor>& input, const paddle::optional<paddle::Tensor>& input,
const paddle::experimental::DataType& dst_dtype, const paddle::experimental::DataType& dst_dtype,
const std::string& op_name, const std::string& op_name,
bool trace_backward = true) { bool trace_backward = true) {
...@@ -132,10 +131,9 @@ inline paddle::optional<paddle::experimental::Tensor> EagerAmpAutoCast( ...@@ -132,10 +131,9 @@ inline paddle::optional<paddle::experimental::Tensor> EagerAmpAutoCast(
return paddle::none; return paddle::none;
} }
inline paddle::optional<std::vector<paddle::experimental::Tensor>> inline paddle::optional<std::vector<paddle::Tensor>> EagerAmpAutoCasts(
EagerAmpAutoCasts(
const std::string& inputs_name, const std::string& inputs_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& inputs, const paddle::optional<std::vector<paddle::Tensor>>& inputs,
const paddle::experimental::DataType& dst_dtype, const paddle::experimental::DataType& dst_dtype,
std::string op_name, std::string op_name,
bool trace_backward = true) { bool trace_backward = true) {
...@@ -143,7 +141,7 @@ EagerAmpAutoCasts( ...@@ -143,7 +141,7 @@ EagerAmpAutoCasts(
return EagerAmpAutoCasts( return EagerAmpAutoCasts(
inputs_name, *inputs, dst_dtype, op_name, trace_backward); inputs_name, *inputs, dst_dtype, op_name, trace_backward);
} }
return paddle::optional<std::vector<paddle::experimental::Tensor>>(); return paddle::optional<std::vector<paddle::Tensor>>();
} }
} // namespace egr } // namespace egr
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
namespace egr { namespace egr {
inline bool NeedTransLayout( inline bool NeedTransLayout(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
const phi::DataLayout& layout) { const phi::DataLayout& layout) {
for (size_t i = 0; i < tensors_vector.size(); i++) { for (size_t i = 0; i < tensors_vector.size(); i++) {
...@@ -35,7 +35,7 @@ inline bool NeedTransLayout( ...@@ -35,7 +35,7 @@ inline bool NeedTransLayout(
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector) { kSlotSmallVectorSize>& tensors_vector) {
// For agnostic op like add, relu, exp // For agnostic op like add, relu, exp
auto first_layout = tensors_vector[0][0].layout(); auto first_layout = tensors_vector[0][0].layout();
...@@ -63,7 +63,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -63,7 +63,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
template <typename T> template <typename T>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T* attr) { T* attr) {
// For lightly op like reduce // For lightly op like reduce
...@@ -78,7 +78,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -78,7 +78,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
template <typename T1, typename T2> template <typename T1, typename T2>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
T1* axis, T1* axis,
T2* keep_dim) { T2* keep_dim) {
...@@ -89,7 +89,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -89,7 +89,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::string* attr) { std::string* attr) {
// Heavily op with (string) data_format, data_layout // Heavily op with (string) data_format, data_layout
...@@ -142,7 +142,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( ...@@ -142,7 +142,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
std::vector<int>* attr) { std::vector<int>* attr) {
// lightly transpose // lightly transpose
...@@ -167,7 +167,7 @@ template <> ...@@ -167,7 +167,7 @@ template <>
inline std::shared_ptr<EagerLayoutTransformer> inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar, bool>( EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis, paddle::experimental::Scalar* axis,
bool* keep_dim) { bool* keep_dim) {
...@@ -191,7 +191,7 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>( ...@@ -191,7 +191,7 @@ EagerLayoutAutotune<paddle::experimental::Scalar, bool>(
template <> template <>
inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>( inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune<int, int>(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
int* start_axis, int* start_axis,
int* stop_axis) { int* stop_axis) {
...@@ -215,7 +215,7 @@ template <> ...@@ -215,7 +215,7 @@ template <>
inline std::shared_ptr<EagerLayoutTransformer> inline std::shared_ptr<EagerLayoutTransformer>
EagerLayoutAutotune<paddle::experimental::Scalar>( EagerLayoutAutotune<paddle::experimental::Scalar>(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
paddle::experimental::Scalar* axis) { paddle::experimental::Scalar* axis) {
if (DesiredLayout() == phi::DataLayout::UNDEFINED) { if (DesiredLayout() == phi::DataLayout::UNDEFINED) {
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/tensor_utils.h"
namespace egr { namespace egr {
inline paddle::experimental::Tensor EagerTraceTransposeOp( inline paddle::Tensor EagerTraceTransposeOp(const phi::DataLayout layout,
const phi::DataLayout layout, const paddle::experimental::Tensor& in) { const paddle::Tensor& in) {
VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout VLOG(4) << "AutoTune Transpose from " << in.layout() << " to " << layout
<< ", tensor's dim size is " << in.shape().size(); << ", tensor's dim size is " << in.shape().size();
if (in.shape().size() != 4) { if (in.shape().size() != 4) {
...@@ -47,7 +47,7 @@ inline phi::DataLayout DefaultLayout() { ...@@ -47,7 +47,7 @@ inline phi::DataLayout DefaultLayout() {
return paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout(); return paddle::imperative::LayoutAutoTune::Instance().GetDefaultLayout();
} }
inline void UpdateLayout(paddle::experimental::Tensor* out_tensor, inline void UpdateLayout(paddle::Tensor* out_tensor,
const phi::DataLayout layout) { const phi::DataLayout layout) {
if (out_tensor->layout() != layout) { if (out_tensor->layout() != layout) {
VLOG(4) << "Update out_tensor's layout from " << out_tensor->layout() VLOG(4) << "Update out_tensor's layout from " << out_tensor->layout()
...@@ -58,7 +58,7 @@ inline void UpdateLayout(paddle::experimental::Tensor* out_tensor, ...@@ -58,7 +58,7 @@ inline void UpdateLayout(paddle::experimental::Tensor* out_tensor,
} }
} }
inline void DealWithShapeOp(paddle::experimental::Tensor* out_tensor, inline void DealWithShapeOp(paddle::Tensor* out_tensor,
const phi::DataLayout layout, const phi::DataLayout layout,
int dim_size) { int dim_size) {
auto des_layout = DesiredLayout(); auto des_layout = DesiredLayout();
...@@ -114,7 +114,7 @@ class EagerLayoutTransformer { ...@@ -114,7 +114,7 @@ class EagerLayoutTransformer {
explicit EagerLayoutTransformer( explicit EagerLayoutTransformer(
const std::string& op_name, const std::string& op_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors_vector, kSlotSmallVectorSize>& tensors_vector,
const Layout final_layout = Layout::UNDEFINED) const Layout final_layout = Layout::UNDEFINED)
: op_name_(op_name), final_layout_(final_layout), dim_size_(1) { : op_name_(op_name), final_layout_(final_layout), dim_size_(1) {
...@@ -123,8 +123,8 @@ class EagerLayoutTransformer { ...@@ -123,8 +123,8 @@ class EagerLayoutTransformer {
virtual ~EagerLayoutTransformer() {} virtual ~EagerLayoutTransformer() {}
virtual paddle::experimental::Tensor TransInTensor( virtual paddle::Tensor TransInTensor(const std::string& in_name,
const std::string& in_name, const paddle::experimental::Tensor& in) { const paddle::Tensor& in) {
// update in shape size // update in shape size
dim_size_ = in.shape().size(); dim_size_ = in.shape().size();
bool need_trans = bool need_trans =
...@@ -140,27 +140,23 @@ class EagerLayoutTransformer { ...@@ -140,27 +140,23 @@ class EagerLayoutTransformer {
return in; return in;
} }
virtual paddle::optional<paddle::experimental::Tensor> TransInTensor( virtual paddle::optional<paddle::Tensor> TransInTensor(
const std::string& in_name, const std::string& in_name, const paddle::optional<paddle::Tensor>& in) {
const paddle::optional<paddle::experimental::Tensor>& in) {
return in ? TransInTensor(in_name, *in) : in; return in ? TransInTensor(in_name, *in) : in;
} }
virtual std::vector<paddle::experimental::Tensor> TransInTensors( virtual std::vector<paddle::Tensor> TransInTensors(
const std::string& in_name, const std::string& in_name, const std::vector<paddle::Tensor>& in) {
const std::vector<paddle::experimental::Tensor>& in) {
return in; return in;
} }
virtual paddle::optional<std::vector<paddle::experimental::Tensor>> virtual paddle::optional<std::vector<paddle::Tensor>> TransInTensors(
TransInTensors(
const std::string& in_name, const std::string& in_name,
const paddle::optional<std::vector<paddle::experimental::Tensor>>& in) { const paddle::optional<std::vector<paddle::Tensor>>& in) {
return (in ? TransInTensors(in_name, *in) : in); return (in ? TransInTensors(in_name, *in) : in);
} }
virtual void SetOutTensorLayout( virtual void SetOutTensorLayout(std::vector<paddle::Tensor>* out_tensor) {
std::vector<paddle::experimental::Tensor>* out_tensor) {
bool update_layout = !(final_layout_ == Layout::UNDEFINED); bool update_layout = !(final_layout_ == Layout::UNDEFINED);
if (update_layout) { if (update_layout) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
...@@ -172,16 +168,16 @@ class EagerLayoutTransformer { ...@@ -172,16 +168,16 @@ class EagerLayoutTransformer {
} }
virtual void SetOutTensorLayout( virtual void SetOutTensorLayout(
paddle::optional<paddle::experimental::Tensor>* out_tensor) { paddle::optional<paddle::Tensor>* out_tensor) {
VLOG(4) << "AutoTune out tensor is optional"; VLOG(4) << "AutoTune out tensor is optional";
} }
virtual void SetOutTensorLayout( virtual void SetOutTensorLayout(
paddle::optional<std::vector<paddle::experimental::Tensor>>* out_tensor) { paddle::optional<std::vector<paddle::Tensor>>* out_tensor) {
VLOG(4) << "AutoTune out tensor is optional"; VLOG(4) << "AutoTune out tensor is optional";
} }
virtual void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { virtual void SetOutTensorLayout(paddle::Tensor* out_tensor) {
if (op_name_ == "shape") { if (op_name_ == "shape") {
return DealWithShapeOp(out_tensor, final_layout_, dim_size_); return DealWithShapeOp(out_tensor, final_layout_, dim_size_);
} }
...@@ -206,8 +202,8 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -206,8 +202,8 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
*layout = phi::DataLayoutToString(DesiredLayout()); *layout = phi::DataLayoutToString(DesiredLayout());
} }
paddle::experimental::Tensor TransInTensor( paddle::Tensor TransInTensor(const std::string& in_name,
const std::string& in_name, const paddle::experimental::Tensor& in) { const paddle::Tensor& in) {
if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) { if (heavily_input_.count(in_name) != 0 && in.layout() != desired_layout_) {
auto out_tensor = EagerTraceTransposeOp(desired_layout_, in); auto out_tensor = EagerTraceTransposeOp(desired_layout_, in);
return out_tensor; return out_tensor;
...@@ -215,19 +211,17 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -215,19 +211,17 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return in; return in;
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::Tensor* out_tensor) {
UpdateLayout(out_tensor, desired_layout_); UpdateLayout(out_tensor, desired_layout_);
} }
void SetOutTensorLayout( void SetOutTensorLayout(std::vector<paddle::Tensor*>* out_tensor) {
std::vector<paddle::experimental::Tensor*>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
SetOutTensorLayout((*out_tensor)[i]); SetOutTensorLayout((*out_tensor)[i]);
} }
} }
void SetOutTensorLayout( void SetOutTensorLayout(std::vector<paddle::Tensor>* out_tensor) {
std::vector<paddle::experimental::Tensor>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
if ((*out_tensor)[i].layout() != desired_layout_) { if ((*out_tensor)[i].layout() != desired_layout_) {
VLOG(4) << "Update out_tensor's layout from " VLOG(4) << "Update out_tensor's layout from "
...@@ -256,8 +250,8 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -256,8 +250,8 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
} }
// transpose from desired to default // transpose from desired to default
paddle::experimental::Tensor TransInTensor( paddle::Tensor TransInTensor(const std::string& in_name,
const std::string& in_name, const paddle::experimental::Tensor& in) { const paddle::Tensor& in) {
std::string input_layout = phi::DataLayoutToString(in.layout()); std::string input_layout = phi::DataLayoutToString(in.layout());
auto default_layout = DefaultLayout(); auto default_layout = DefaultLayout();
if (final_layout_ == input_layout && in.shape().size() == 4) { if (final_layout_ == input_layout && in.shape().size() == 4) {
...@@ -270,10 +264,9 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -270,10 +264,9 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return in; return in;
} }
virtual std::vector<paddle::experimental::Tensor> TransInTensors( virtual std::vector<paddle::Tensor> TransInTensors(
const std::string& in_name, const std::string& in_name, const std::vector<paddle::Tensor>& in) {
const std::vector<paddle::experimental::Tensor>& in) { std::vector<paddle::Tensor> result;
std::vector<paddle::experimental::Tensor> result;
auto desired_layout = DesiredLayout(); auto desired_layout = DesiredLayout();
auto default_layout = DefaultLayout(); auto default_layout = DefaultLayout();
for (size_t i = 0; i < in.size(); i++) { for (size_t i = 0; i < in.size(); i++) {
...@@ -292,19 +285,17 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { ...@@ -292,19 +285,17 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
return result; return result;
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::Tensor* out_tensor) {
UpdateLayout(out_tensor, DefaultLayout()); UpdateLayout(out_tensor, DefaultLayout());
} }
void SetOutTensorLayout( void SetOutTensorLayout(std::vector<paddle::Tensor*>* out_tensor) {
std::vector<paddle::experimental::Tensor*>* out_tensor) {
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
SetOutTensorLayout((*out_tensor)[i]); SetOutTensorLayout((*out_tensor)[i]);
} }
} }
void SetOutTensorLayout( void SetOutTensorLayout(std::vector<paddle::Tensor>* out_tensor) {
std::vector<paddle::experimental::Tensor>* out_tensor) {
auto default_layout = DefaultLayout(); auto default_layout = DefaultLayout();
for (size_t i = 0; i < out_tensor->size(); i++) { for (size_t i = 0; i < out_tensor->size(); i++) {
phi::DenseTensorUtils::GetMutableMeta( phi::DenseTensorUtils::GetMutableMeta(
...@@ -336,12 +327,12 @@ class EagerTransposeOpTransformer ...@@ -336,12 +327,12 @@ class EagerTransposeOpTransformer
(*axis)[3] = perm[(*axis)[3]]; (*axis)[3] = perm[(*axis)[3]];
} }
paddle::experimental::Tensor TransInTensor( paddle::Tensor TransInTensor(const std::string& in_name,
const std::string& in_name, const paddle::experimental::Tensor& in) { const paddle::Tensor& in) {
return in; return in;
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::Tensor* out_tensor) {
UpdateLayout(out_tensor, DefaultLayout()); UpdateLayout(out_tensor, DefaultLayout());
} }
}; };
...@@ -362,7 +353,7 @@ class EagerArgmaxOpTransformer ...@@ -362,7 +353,7 @@ class EagerArgmaxOpTransformer
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]); (*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::Tensor* out_tensor) {
UpdateLayout(out_tensor, DesiredLayout()); UpdateLayout(out_tensor, DesiredLayout());
} }
}; };
...@@ -376,12 +367,12 @@ class EagerFlattenOpTransformer ...@@ -376,12 +367,12 @@ class EagerFlattenOpTransformer
} }
// transpose from NHWC to NCHW // transpose from NHWC to NCHW
paddle::experimental::Tensor TransInTensor( paddle::Tensor TransInTensor(const std::string& in_name,
const std::string& in_name, const paddle::experimental::Tensor& in) { const paddle::Tensor& in) {
return in; return in;
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::Tensor* out_tensor) {
UpdateLayout(out_tensor, DefaultLayout()); UpdateLayout(out_tensor, DefaultLayout());
} }
}; };
...@@ -403,13 +394,12 @@ class EagerConcatOpTransformer ...@@ -403,13 +394,12 @@ class EagerConcatOpTransformer
(*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]); (*axis) = static_cast<paddle::experimental::Scalar>(perm[axes]);
} }
virtual std::vector<paddle::experimental::Tensor> TransInTensors( virtual std::vector<paddle::Tensor> TransInTensors(
const std::string& in_name, const std::string& in_name, const std::vector<paddle::Tensor>& in) {
const std::vector<paddle::experimental::Tensor>& in) {
return in; return in;
} }
void SetOutTensorLayout(paddle::experimental::Tensor* out_tensor) { void SetOutTensorLayout(paddle::Tensor* out_tensor) {
UpdateLayout(out_tensor, DesiredLayout()); UpdateLayout(out_tensor, DesiredLayout());
} }
}; };
......
...@@ -26,7 +26,7 @@ namespace egr { ...@@ -26,7 +26,7 @@ namespace egr {
/** /**
* VariableCompatTensor class is used by Eager mode for now. It's painful to * VariableCompatTensor class is used by Eager mode for now. It's painful to
* do this in Eager Mode, the better choice is to design the special Tensor * do this in Eager Mode, the better choice is to design the special Tensor
* directly in phi and use it in paddle::experimental::Tensor. * directly in phi and use it in paddle::Tensor.
* However, we have some special operators, and they use special input variable * However, we have some special operators, and they use special input variable
* type, such as vector<string>, unordered_map<wstring, int>, these type cannot * type, such as vector<string>, unordered_map<wstring, int>, these type cannot
* cover by DenseTensor or SparseTensor. So, we have to provide a compatible * cover by DenseTensor or SparseTensor. So, we have to provide a compatible
...@@ -178,22 +178,22 @@ class VariableCompatTensor ...@@ -178,22 +178,22 @@ class VariableCompatTensor
std::shared_ptr<Placeholder> holder_; std::shared_ptr<Placeholder> holder_;
}; };
inline bool IsVariableCompatTensor(const paddle::experimental::Tensor& tensor) { inline bool IsVariableCompatTensor(const paddle::Tensor& tensor) {
return VariableCompatTensor::classof(tensor.impl().get()); return VariableCompatTensor::classof(tensor.impl().get());
} }
/** /**
* This class is used by Eager mode for now. It's painful to do this in Eager * This class is used by Eager mode for now. It's painful to do this in Eager
* Mode, the better choice is to use paddle::experimental::Tensor directly. * Mode, the better choice is to use paddle::Tensor directly.
* However, we have a punch of nested kernel code, and they use * However, we have a punch of nested kernel code, and they use
* paddle::framework::Variable in inner logic code. So, we have to provide * paddle::framework::Variable in inner logic code. So, we have to provide
* variable in paddle::framework::ExecutionContext to support it. We should * variable in paddle::framework::ExecutionContext to support it. We should
* remove this as soon as we finish our latest Phi Lib, and use * remove this as soon as we finish our latest Phi Lib, and use
* paddle::experimental::Tensor instead. * paddle::Tensor instead.
* *
* Note: Keep this class as clean as possible. * Note: Keep this class as clean as possible.
* This class should only support method declared in * This class should only support method declared in
* paddle::experimental::Tensor with access method of * paddle::Tensor with access method of
* paddle::framework::Variable no more members are acceptable. * paddle::framework::Variable no more members are acceptable.
* **/ * **/
class EagerVariable final { class EagerVariable final {
...@@ -204,8 +204,7 @@ class EagerVariable final { ...@@ -204,8 +204,7 @@ class EagerVariable final {
explicit EagerVariable(const std::string& name) : name_(name) {} explicit EagerVariable(const std::string& name) : name_(name) {}
explicit EagerVariable(const paddle::experimental::Tensor& tensor) explicit EagerVariable(const paddle::Tensor& tensor) : name_(tensor.name()) {
: name_(tensor.name()) {
if (tensor.defined()) { if (tensor.defined()) {
if (tensor.is_dense_tensor()) { if (tensor.is_dense_tensor()) {
ConstructVariableFromTensor<phi::DenseTensor>(tensor); ConstructVariableFromTensor<phi::DenseTensor>(tensor);
...@@ -283,7 +282,7 @@ class EagerVariable final { ...@@ -283,7 +282,7 @@ class EagerVariable final {
} }
template <typename VarType> template <typename VarType>
void ConstructVariableFromTensor(const paddle::experimental::Tensor& tensor) { void ConstructVariableFromTensor(const paddle::Tensor& tensor) {
auto* framework_tensor = var_.GetMutable<VarType>(); auto* framework_tensor = var_.GetMutable<VarType>();
// Contruct phi::DenseTensor from egr::EagerVariable // Contruct phi::DenseTensor from egr::EagerVariable
auto tensor_dense = std::dynamic_pointer_cast<VarType>(tensor.impl()); auto tensor_dense = std::dynamic_pointer_cast<VarType>(tensor.impl());
...@@ -299,8 +298,7 @@ class EagerVariable final { ...@@ -299,8 +298,7 @@ class EagerVariable final {
} }
template <typename VarType> template <typename VarType>
void ConstructVariableFromCompatTensor( void ConstructVariableFromCompatTensor(const paddle::Tensor& tensor) {
const paddle::experimental::Tensor& tensor) {
auto* framework_holder = var_.GetMutable<VarType>(); auto* framework_holder = var_.GetMutable<VarType>();
// Contruct phi::DenseTensor from egr::EagerVariable // Contruct phi::DenseTensor from egr::EagerVariable
auto* compat_tensor = auto* compat_tensor =
......
...@@ -40,9 +40,8 @@ class GeneralGrad { ...@@ -40,9 +40,8 @@ class GeneralGrad {
static GeneralGrad& Instance() { return *general_grad_; } static GeneralGrad& Instance() { return *general_grad_; }
// Get inputs's / no_grad_vars's GradNodes and InputMeta Info // Get inputs's / no_grad_vars's GradNodes and InputMeta Info
void GetTargetNodesInfo( void GetTargetNodesInfo(const std::vector<paddle::Tensor>& inputs,
const std::vector<paddle::experimental::Tensor>& inputs, bool is_no_grad_vars) {
bool is_no_grad_vars) {
std::string msg = is_no_grad_vars ? "no_grad_vars" : "inputs"; std::string msg = is_no_grad_vars ? "no_grad_vars" : "inputs";
VLOG(6) << "Running in GetTargetNodesInfo."; VLOG(6) << "Running in GetTargetNodesInfo.";
if (!inputs.empty()) { if (!inputs.empty()) {
...@@ -231,32 +230,29 @@ class GeneralGrad { ...@@ -231,32 +230,29 @@ class GeneralGrad {
(iter->second)->Buffers()[rank_info.first][rank_info.second]; (iter->second)->Buffers()[rank_info.first][rank_info.second];
// save the target result // save the target result
results_map_[input_target_node.first] = results_map_[input_target_node.first] =
std::make_shared<paddle::experimental::Tensor>(target_result); std::make_shared<paddle::Tensor>(target_result);
} }
} }
} // TODO(jiabin): Some check here. } // TODO(jiabin): Some check here.
} }
void SetResultForEnddingNodes( void SetResultForEnddingNodes(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize> grad_output, grad_output,
GradNodeBase* node) { GradNodeBase* node) {
if (IsEnddingNodes(node)) { if (IsEnddingNodes(node)) {
VLOG(6) << "Set result for endding_nodes_ with grad_output_tensors"; VLOG(6) << "Set result for endding_nodes_ with grad_output_tensors";
results_map_[node] = results_map_[node] = std::make_shared<paddle::Tensor>(grad_output[0][0]);
std::make_shared<paddle::experimental::Tensor>(grad_output[0][0]);
} }
} }
std::shared_ptr<paddle::experimental::Tensor> FetchGradForTensor( std::shared_ptr<paddle::Tensor> FetchGradForTensor(
const paddle::experimental::Tensor& tensor, const paddle::Tensor& tensor, egr::GradNodeBase* target_node) {
egr::GradNodeBase* target_node) { std::shared_ptr<paddle::Tensor> tmp{std::make_shared<paddle::Tensor>()};
std::shared_ptr<paddle::experimental::Tensor> tmp{
std::make_shared<paddle::experimental::Tensor>()};
VLOG(6) VLOG(6)
<< "Running in FetchGradForTensor, prepare FetchGrad Hook for tensor: " << "Running in FetchGradForTensor, prepare FetchGrad Hook for tensor: "
<< tensor.name(); << tensor.name();
auto hook = [tmp](const paddle::experimental::Tensor& t) { auto hook = [tmp](const paddle::Tensor& t) {
auto tmp_grad = tmp.get(); auto tmp_grad = tmp.get();
if (t.defined()) { if (t.defined()) {
VLOG(6) << "Set impl for FetchGrad Hook for tensor: " << t.name(); VLOG(6) << "Set impl for FetchGrad Hook for tensor: " << t.name();
...@@ -264,8 +260,8 @@ class GeneralGrad { ...@@ -264,8 +260,8 @@ class GeneralGrad {
tmp_grad->set_autograd_meta(t.mutable_autograd_meta()); tmp_grad->set_autograd_meta(t.mutable_autograd_meta());
return t; return t;
} else { } else {
VLOG(6) << "Retain NULL paddle::experimental::Tensor in FetchGrad Hook"; VLOG(6) << "Retain NULL paddle::Tensor in FetchGrad Hook";
return paddle::experimental::Tensor(); return paddle::Tensor();
} }
}; };
...@@ -283,8 +279,7 @@ class GeneralGrad { ...@@ -283,8 +279,7 @@ class GeneralGrad {
// backward graph, use grad node's output as inputs' gradients and no need to // backward graph, use grad node's output as inputs' gradients and no need to
// register Hook. Please note that endding node must be GradNodeAccumulation // register Hook. Please note that endding node must be GradNodeAccumulation
// after ModifyBackwardGraph function. // after ModifyBackwardGraph function.
void RegisterFetchGradHook( void RegisterFetchGradHook(const std::vector<paddle::Tensor>& inputs) {
const std::vector<paddle::experimental::Tensor>& inputs) {
VLOG(6) << "Running in RegisterFetchGradHook."; VLOG(6) << "Running in RegisterFetchGradHook.";
if (!inputs.empty()) { if (!inputs.empty()) {
size_t num_inputs = inputs.size(); size_t num_inputs = inputs.size();
...@@ -436,14 +431,14 @@ class GeneralGrad { ...@@ -436,14 +431,14 @@ class GeneralGrad {
} }
} }
std::vector<paddle::experimental::Tensor> GetResults( std::vector<paddle::Tensor> GetResults(
const std::vector<paddle::experimental::Tensor>& inputs, const std::vector<paddle::Tensor>& inputs,
bool allow_unused, bool allow_unused,
bool create_graph) { bool create_graph) {
VLOG(6) << "Running in GetResults"; VLOG(6) << "Running in GetResults";
if (inputs.empty()) return {}; if (inputs.empty()) return {};
std::vector<paddle::experimental::Tensor> results; std::vector<paddle::Tensor> results;
results.reserve(inputs.size()); results.reserve(inputs.size());
for (size_t i = 0; i < inputs.size(); ++i) { for (size_t i = 0; i < inputs.size(); ++i) {
...@@ -582,8 +577,8 @@ class GeneralGrad { ...@@ -582,8 +577,8 @@ class GeneralGrad {
} }
void PreparedForGeneralGrad( void PreparedForGeneralGrad(
const std::vector<paddle::experimental::Tensor>& inputs, const std::vector<paddle::Tensor>& inputs,
const std::vector<paddle::experimental::Tensor>& no_grad_vars, const std::vector<paddle::Tensor>& no_grad_vars,
const std::deque<GradNodeBase*>& orig_queue, const std::deque<GradNodeBase*>& orig_queue,
std::deque<GradNodeBase*>* queue, std::deque<GradNodeBase*>* queue,
const std::unordered_map<GradNodeBase*, const std::unordered_map<GradNodeBase*,
...@@ -645,8 +640,7 @@ class GeneralGrad { ...@@ -645,8 +640,7 @@ class GeneralGrad {
std::unordered_map<GradNodeBase* /* next node */, std::unordered_map<GradNodeBase* /* next node */,
std::unordered_set<GradNodeBase*> /* pre nodes */> std::unordered_set<GradNodeBase*> /* pre nodes */>
depending_nodes_; depending_nodes_;
std::unordered_map<GradNodeBase*, std::unordered_map<GradNodeBase*, std::shared_ptr<paddle::Tensor>>
std::shared_ptr<paddle::experimental::Tensor>>
results_map_; results_map_;
std::vector<std::shared_ptr<GradNodeBase>> copied_grad_nodes_; std::vector<std::shared_ptr<GradNodeBase>> copied_grad_nodes_;
......
...@@ -34,8 +34,7 @@ ...@@ -34,8 +34,7 @@
**/ **/
namespace egr { namespace egr {
static void CheckTensor(const paddle::experimental::Tensor& pre, static void CheckTensor(const paddle::Tensor& pre, const paddle::Tensor& post) {
const paddle::experimental::Tensor& post) {
if (!pre.initialized() && post.initialized()) { if (!pre.initialized() && post.initialized()) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied( PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"The tensor in before and after hook are not consistent")); "The tensor in before and after hook are not consistent"));
...@@ -82,7 +81,7 @@ GradNodeBase::MutableOutputMeta() { ...@@ -82,7 +81,7 @@ GradNodeBase::MutableOutputMeta() {
return bwd_out_meta_; return bwd_out_meta_;
} }
void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, void GradNodeBase::SetGradInMeta(const paddle::Tensor& fwd_out,
size_t slot_rank) { size_t slot_rank) {
VLOG(7) << "Set GradSlotMeta for Grad Inputs"; VLOG(7) << "Set GradSlotMeta for Grad Inputs";
auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out); auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out);
...@@ -142,9 +141,8 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, ...@@ -142,9 +141,8 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
} }
} }
void GradNodeBase::SetGradInMeta( void GradNodeBase::SetGradInMeta(const std::vector<paddle::Tensor>& fwd_out,
const std::vector<paddle::experimental::Tensor>& fwd_out, size_t slot_rank) {
size_t slot_rank) {
VLOG(7) << "Set GradSlotMeta for Grad Inputs"; VLOG(7) << "Set GradSlotMeta for Grad Inputs";
size_t slot_size = fwd_out.size(); size_t slot_size = fwd_out.size();
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
...@@ -208,7 +206,7 @@ void GradNodeBase::SetGradInMeta( ...@@ -208,7 +206,7 @@ void GradNodeBase::SetGradInMeta(
} }
} }
void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in,
size_t slot_rank) { size_t slot_rank) {
auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in); auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in);
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
...@@ -265,8 +263,8 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, ...@@ -265,8 +263,8 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
} }
} }
void GradNodeBase::SetGradOutMeta( void GradNodeBase::SetGradOutMeta(const std::vector<paddle::Tensor>& fwd_in,
const std::vector<paddle::experimental::Tensor>& fwd_in, size_t slot_rank) { size_t slot_rank) {
size_t slot_size = fwd_in.size(); size_t slot_size = fwd_in.size();
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
slot_rank, slot_rank,
...@@ -327,8 +325,7 @@ void GradNodeBase::SetGradOutMeta( ...@@ -327,8 +325,7 @@ void GradNodeBase::SetGradOutMeta(
} }
void GradNodeBase::SetGradOutMeta( void GradNodeBase::SetGradOutMeta(
const std::vector<const paddle::experimental::Tensor*>& fwd_in, const std::vector<const paddle::Tensor*>& fwd_in, size_t slot_rank) {
size_t slot_rank) {
size_t slot_size = fwd_in.size(); size_t slot_size = fwd_in.size();
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
slot_rank, slot_rank,
...@@ -406,14 +403,12 @@ int64_t GradNodeBase::RegisterGradientHook( ...@@ -406,14 +403,12 @@ int64_t GradNodeBase::RegisterGradientHook(
return next_hook_id_++; return next_hook_id_++;
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
GradNodeBase::ApplyGradientHooks( GradNodeBase::ApplyGradientHooks(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& tensors) { kSlotSmallVectorSize>& tensors) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize> outs(
kSlotSmallVectorSize> tensors.size());
outs(tensors.size());
for (auto& hook_pair : gradient_hooks_) { for (auto& hook_pair : gradient_hooks_) {
size_t slot_id = std::get<0>(hook_pair.second); size_t slot_id = std::get<0>(hook_pair.second);
size_t rank = std::get<1>(hook_pair.second); size_t rank = std::get<1>(hook_pair.second);
...@@ -431,9 +426,9 @@ GradNodeBase::ApplyGradientHooks( ...@@ -431,9 +426,9 @@ GradNodeBase::ApplyGradientHooks(
"than rank size of grad_tensors", "than rank size of grad_tensors",
slot_id)); slot_id));
std::vector<paddle::experimental::Tensor>& slot_out = outs[slot_id]; std::vector<paddle::Tensor>& slot_out = outs[slot_id];
slot_out.resize(tensors[slot_id].size()); slot_out.resize(tensors[slot_id].size());
paddle::experimental::Tensor& out = slot_out[rank]; paddle::Tensor& out = slot_out[rank];
if (!out.defined() || !out.initialized()) { if (!out.defined() || !out.initialized()) {
out = (*hook)(tensors[slot_id][rank]); out = (*hook)(tensors[slot_id][rank]);
} else { } else {
...@@ -460,11 +455,10 @@ GradNodeBase::ApplyGradientHooks( ...@@ -460,11 +455,10 @@ GradNodeBase::ApplyGradientHooks(
} }
void GradNodeBase::HandleComplexGradToRealGrad( void GradNodeBase::HandleComplexGradToRealGrad(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>*
kSlotSmallVectorSize>* out_grads) { out_grads) {
for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) { for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) {
const std::vector<paddle::experimental::Tensor>& slot_out_grads = const std::vector<paddle::Tensor>& slot_out_grads = (*out_grads)[slot_id];
(*out_grads)[slot_id];
for (size_t rank_id = 0; rank_id < slot_out_grads.size(); rank_id++) { for (size_t rank_id = 0; rank_id < slot_out_grads.size(); rank_id++) {
const GradSlotMeta& slot_meta = bwd_out_meta_[slot_id][rank_id]; const GradSlotMeta& slot_meta = bwd_out_meta_[slot_id][rank_id];
...@@ -477,7 +471,7 @@ void GradNodeBase::HandleComplexGradToRealGrad( ...@@ -477,7 +471,7 @@ void GradNodeBase::HandleComplexGradToRealGrad(
auto fwd_data_type = paddle::framework::TransToProtoVarType( auto fwd_data_type = paddle::framework::TransToProtoVarType(
slot_meta.GetTensorMeta().dtype); slot_meta.GetTensorMeta().dtype);
const paddle::experimental::Tensor& grad = slot_out_grads[rank_id]; const paddle::Tensor& grad = slot_out_grads[rank_id];
if (paddle::framework::IsComplexType(fwd_data_type)) continue; if (paddle::framework::IsComplexType(fwd_data_type)) continue;
......
...@@ -178,13 +178,13 @@ class GradNodeBase { ...@@ -178,13 +178,13 @@ class GradNodeBase {
* vector of Tensor which contains grads input of current operator * vector of Tensor which contains grads input of current operator
* *
* Note: why we need backward inputs and outputs construct as vector of vector * Note: why we need backward inputs and outputs construct as vector of vector
* of paddle::experimental::Tensor? * of paddle::Tensor?
* Since all of paddle op composite in form of {"Slot name ", vector<Var>}, * Since all of paddle op composite in form of {"Slot name ", vector<Var>},
* so, vector of vector is better choice to fit this format. * so, vector of vector is better choice to fit this format.
* **/ * **/
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, operator()(paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, bool create_graph = false,
bool is_new_grad = false) = 0; bool is_new_grad = false) = 0;
...@@ -216,18 +216,15 @@ class GradNodeBase { ...@@ -216,18 +216,15 @@ class GradNodeBase {
* Set bwd ins and outs info with forward vars * Set bwd ins and outs info with forward vars
* **/ * **/
void SetGradInMeta(const std::vector<paddle::experimental::Tensor>& fwd_out, void SetGradInMeta(const std::vector<paddle::Tensor>& fwd_out,
size_t slot_rank);
void SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
size_t slot_rank); size_t slot_rank);
void SetGradInMeta(const paddle::Tensor& fwd_out, size_t slot_rank);
void SetGradOutMeta(const std::vector<paddle::experimental::Tensor>& fwd_in, void SetGradOutMeta(const std::vector<paddle::Tensor>& fwd_in,
size_t slot_rank); size_t slot_rank);
void SetGradOutMeta( void SetGradOutMeta(const std::vector<const paddle::Tensor*>& fwd_in,
const std::vector<const paddle::experimental::Tensor*>& fwd_in,
size_t slot_rank);
void SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
size_t slot_rank); size_t slot_rank);
void SetGradOutMeta(const paddle::Tensor& fwd_in, size_t slot_rank);
/** /**
* Default setters for Grad in/out meta this should be used for same special * Default setters for Grad in/out meta this should be used for same special
* Node which will not create by user * Node which will not create by user
...@@ -269,18 +266,16 @@ class GradNodeBase { ...@@ -269,18 +266,16 @@ class GradNodeBase {
gradient_hooks_ = hooks; gradient_hooks_ = hooks;
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize> ApplyGradientHooks(const paddle::small_vector<std::vector<paddle::Tensor>,
ApplyGradientHooks( kSlotSmallVectorSize>& tensors);
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors);
/** /**
* Handle Complex - Real Type Promotion * Handle Complex - Real Type Promotion
* **/ * **/
void HandleComplexGradToRealGrad( void HandleComplexGradToRealGrad(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>*
kSlotSmallVectorSize>* out_grads); out_grads);
bool NeedComplexToRealConversion() { return need_complex_to_real_; } bool NeedComplexToRealConversion() { return need_complex_to_real_; }
virtual std::string name() { return "GradNodeBase"; } virtual std::string name() { return "GradNodeBase"; }
......
...@@ -29,11 +29,10 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) { ...@@ -29,11 +29,10 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) {
paddle::experimental::zeros_like(buffer_[slot_id][rank]); paddle::experimental::zeros_like(buffer_[slot_id][rank]);
} }
void GradTensorHolder::CopyValueFromTensor( void GradTensorHolder::CopyValueFromTensor(size_t slot_id,
size_t slot_id, size_t rank,
size_t rank, const paddle::Tensor& t,
const paddle::experimental::Tensor& t, bool fill_one) {
bool fill_one) {
// TODO(jiabin): We need to deal with empty input_buffer with slot size not // TODO(jiabin): We need to deal with empty input_buffer with slot size not
// empty; // empty;
PADDLE_ENFORCE(slot_id < buffer_.size(), PADDLE_ENFORCE(slot_id < buffer_.size(),
...@@ -56,7 +55,7 @@ void GradTensorHolder::CopyValueFromTensor( ...@@ -56,7 +55,7 @@ void GradTensorHolder::CopyValueFromTensor(
buffer_[slot_id].size(), buffer_[slot_id].size(),
rank)); rank));
if (!fill_one) { if (!fill_one) {
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank]; paddle::Tensor& buffer_tensor = buffer_[slot_id][rank];
if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) { if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
// Perform deep copy here // Perform deep copy here
buffer_tensor.copy_(t, t.place(), false); buffer_tensor.copy_(t, t.place(), false);
...@@ -97,7 +96,7 @@ void GradTensorHolder::CopyValueFromTensor( ...@@ -97,7 +96,7 @@ void GradTensorHolder::CopyValueFromTensor(
void GradTensorHolder::add(size_t slot_id, void GradTensorHolder::add(size_t slot_id,
size_t rank, size_t rank,
const paddle::experimental::Tensor& t, const paddle::Tensor& t,
bool create_graph) { bool create_graph) {
if (!t.initialized()) { if (!t.initialized()) {
VLOG(3) << "No need to do accumulate for uninitialized t."; VLOG(3) << "No need to do accumulate for uninitialized t.";
...@@ -122,7 +121,7 @@ void GradTensorHolder::add(size_t slot_id, ...@@ -122,7 +121,7 @@ void GradTensorHolder::add(size_t slot_id,
buffer_[slot_id].size(), buffer_[slot_id].size(),
rank)); rank));
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank]; paddle::Tensor& buffer_tensor = buffer_[slot_id][rank];
// TODO(jiabin): Code bellow is ugly to divide which inner var we used, // TODO(jiabin): Code bellow is ugly to divide which inner var we used,
// remove framework::Variable // remove framework::Variable
// related code later. // related code later.
...@@ -150,35 +149,33 @@ void GradTensorHolder::add(size_t slot_id, ...@@ -150,35 +149,33 @@ void GradTensorHolder::add(size_t slot_id,
if (create_graph || t.is_custom_device()) { if (create_graph || t.is_custom_device()) {
buffer_tensor = add_ad_func(t, buffer_tensor); buffer_tensor = add_ad_func(t, buffer_tensor);
} else { } else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>( paddle::imperative::TensorAdd<paddle::Tensor>(t, &buffer_tensor);
t, &buffer_tensor);
} }
} else { } else {
// TODO(jiabin): Support Other TensorBase later // TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with // TODO(zhanlve): Replace SelectedRowsAddTensor with
// add_dygraph_function once it's supported // add_dygraph_function once it's supported
paddle::experimental::Tensor new_buffer( paddle::Tensor new_buffer(std::make_shared<phi::DenseTensor>(),
std::make_shared<phi::DenseTensor>(), "tmp_accumulator"); "tmp_accumulator");
paddle::imperative::SelectedRowsAddTensor( paddle::imperative::SelectedRowsAddTensor(
buffer_tensor, t, &new_buffer); buffer_tensor, t, &new_buffer);
buffer_tensor.set_impl(new_buffer.impl()); buffer_tensor.set_impl(new_buffer.impl());
} }
} else if (t.is_sparse_coo_tensor()) { } else if (t.is_sparse_coo_tensor()) {
auto t_sparse = std::dynamic_pointer_cast<phi::SparseCooTensor>(t.impl()); auto t_sparse = std::dynamic_pointer_cast<phi::SparseCooTensor>(t.impl());
paddle::experimental::Tensor t_values( paddle::Tensor t_values(
std::make_shared<phi::DenseTensor>(t_sparse->non_zero_elements())); std::make_shared<phi::DenseTensor>(t_sparse->non_zero_elements()));
// In fact, the gradient of SparseTensor is still a SparseTensor // In fact, the gradient of SparseTensor is still a SparseTensor
if (buffer_tensor.is_sparse_coo_tensor()) { if (buffer_tensor.is_sparse_coo_tensor()) {
auto buffer_sparse = std::dynamic_pointer_cast<phi::SparseCooTensor>( auto buffer_sparse = std::dynamic_pointer_cast<phi::SparseCooTensor>(
buffer_tensor.impl()); buffer_tensor.impl());
paddle::experimental::Tensor buffer_values( paddle::Tensor buffer_values(std::make_shared<phi::DenseTensor>(
std::make_shared<phi::DenseTensor>( buffer_sparse->non_zero_elements()));
buffer_sparse->non_zero_elements()));
if (create_graph || t.is_custom_device()) { if (create_graph || t.is_custom_device()) {
buffer_values = add_ad_func(t_values, buffer_values); buffer_values = add_ad_func(t_values, buffer_values);
} else { } else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>( paddle::imperative::TensorAdd<paddle::Tensor>(t_values,
t_values, &buffer_values); &buffer_values);
} }
} }
} else { } else {
...@@ -189,8 +186,8 @@ void GradTensorHolder::add(size_t slot_id, ...@@ -189,8 +186,8 @@ void GradTensorHolder::add(size_t slot_id,
paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor); paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor);
} else { } else {
buffer_tensor = buffer_tensor =
std::move(*paddle::imperative::SelectedRowsMerge< std::move(*paddle::imperative::SelectedRowsMerge<paddle::Tensor>(
paddle::experimental::Tensor>(t, buffer_tensor)); t, buffer_tensor));
} }
} }
} }
......
...@@ -39,9 +39,8 @@ class GradTensorHolder { ...@@ -39,9 +39,8 @@ class GradTensorHolder {
GradTensorHolder(const GradTensorHolder& other) = default; GradTensorHolder(const GradTensorHolder& other) = default;
explicit GradTensorHolder( explicit GradTensorHolder(paddle::small_vector<std::vector<paddle::Tensor>,
paddle::small_vector<std::vector<paddle::experimental::Tensor>, kSlotSmallVectorSize>&& inputs)
kSlotSmallVectorSize>&& inputs)
: buffer_(std::move(inputs)) {} : buffer_(std::move(inputs)) {}
GradTensorHolder& operator=(const GradTensorHolder& other) = default; GradTensorHolder& operator=(const GradTensorHolder& other) = default;
...@@ -49,20 +48,18 @@ class GradTensorHolder { ...@@ -49,20 +48,18 @@ class GradTensorHolder {
// Create new tensor and copy tensor->impl // Create new tensor and copy tensor->impl
void add(size_t slot_id, void add(size_t slot_id,
size_t rank, size_t rank,
const paddle::experimental::Tensor& t, const paddle::Tensor& t,
bool create_graph = false); bool create_graph = false);
void CopyValueFromTensor(size_t slot_id, void CopyValueFromTensor(size_t slot_id,
size_t rank, size_t rank,
const paddle::experimental::Tensor& t, const paddle::Tensor& t,
bool fill_one = false); bool fill_one = false);
const std::vector<paddle::experimental::Tensor>& operator[]( const std::vector<paddle::Tensor>& operator[](const size_t& pos) {
const size_t& pos) {
return buffer_[pos]; return buffer_[pos];
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>&
kSlotSmallVectorSize>&
Buffers() { Buffers() {
return buffer_; return buffer_;
} }
...@@ -70,8 +67,7 @@ class GradTensorHolder { ...@@ -70,8 +67,7 @@ class GradTensorHolder {
void SetBufferSlotRankZeros(size_t slot_id, size_t rank); void SetBufferSlotRankZeros(size_t slot_id, size_t rank);
private: private:
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
buffer_; buffer_;
}; };
......
...@@ -26,8 +26,7 @@ namespace egr { ...@@ -26,8 +26,7 @@ namespace egr {
class TensorHook { class TensorHook {
public: public:
virtual ~TensorHook() = default; virtual ~TensorHook() = default;
virtual paddle::experimental::Tensor operator()( virtual paddle::Tensor operator()(const paddle::Tensor& var) = 0;
const paddle::experimental::Tensor& var) = 0;
}; };
class VoidHook { class VoidHook {
...@@ -38,19 +37,16 @@ class VoidHook { ...@@ -38,19 +37,16 @@ class VoidHook {
class CppTensorHook : public TensorHook { class CppTensorHook : public TensorHook {
public: public:
explicit CppTensorHook(const std::function<paddle::experimental::Tensor( explicit CppTensorHook(
const paddle::experimental::Tensor&)>& fn) const std::function<paddle::Tensor(const paddle::Tensor&)>& fn)
: fn_(std::move(fn)) {} : fn_(std::move(fn)) {}
paddle::experimental::Tensor operator()( paddle::Tensor operator()(const paddle::Tensor& var) override {
const paddle::experimental::Tensor& var) override {
return fn_(var); return fn_(var);
} }
private: private:
std::function<paddle::experimental::Tensor( std::function<paddle::Tensor(const paddle::Tensor&)> fn_;
const paddle::experimental::Tensor&)>
fn_;
}; };
class CppVoidHook : public VoidHook { class CppVoidHook : public VoidHook {
...@@ -76,14 +72,14 @@ class PackHookBase { ...@@ -76,14 +72,14 @@ class PackHookBase {
public: public:
virtual ~PackHookBase() = default; virtual ~PackHookBase() = default;
virtual std::shared_ptr<PyObjectHolderBase> operator()( virtual std::shared_ptr<PyObjectHolderBase> operator()(
const paddle::experimental::Tensor& tensor) = 0; const paddle::Tensor& tensor) = 0;
virtual void* operator()(void* py_tensor) = 0; virtual void* operator()(void* py_tensor) = 0;
}; };
class UnPackHookBase { class UnPackHookBase {
public: public:
virtual ~UnPackHookBase() = default; virtual ~UnPackHookBase() = default;
virtual paddle::experimental::Tensor operator()( virtual paddle::Tensor operator()(
std::shared_ptr<PyObjectHolderBase> packed_value) = 0; std::shared_ptr<PyObjectHolderBase> packed_value) = 0;
virtual void* operator()(void* packed_value, void* other) = 0; virtual void* operator()(void* packed_value, void* other) = 0;
}; };
......
...@@ -103,7 +103,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name, ...@@ -103,7 +103,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name,
void CheckTensorHasNanOrInf( void CheckTensorHasNanOrInf(
const std::string& api_name, const std::string& api_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& tensors) { egr::kSlotSmallVectorSize>& tensors) {
for (auto& tensor_vector : tensors) { for (auto& tensor_vector : tensors) {
CheckTensorHasNanOrInf(api_name, tensor_vector); CheckTensorHasNanOrInf(api_name, tensor_vector);
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
namespace egr { namespace egr {
using paddle::experimental::Tensor; using paddle::Tensor;
using TupleOfTwoTensors = std::tuple<Tensor, Tensor>; using TupleOfTwoTensors = std::tuple<Tensor, Tensor>;
using TupleOfThreeTensors = std::tuple<Tensor, Tensor, Tensor>; using TupleOfThreeTensors = std::tuple<Tensor, Tensor, Tensor>;
using TupleOfFourTensors = std::tuple<Tensor, Tensor, Tensor, Tensor>; using TupleOfFourTensors = std::tuple<Tensor, Tensor, Tensor, Tensor>;
...@@ -59,7 +59,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name, ...@@ -59,7 +59,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name,
void CheckTensorHasNanOrInf( void CheckTensorHasNanOrInf(
const std::string& api_name, const std::string& api_name,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>, const paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& tensors); egr::kSlotSmallVectorSize>& tensors);
template <typename TupleT, size_t N, size_t Last> template <typename TupleT, size_t N, size_t Last>
......
...@@ -32,18 +32,16 @@ GradNodePyLayer::~GradNodePyLayer() { ...@@ -32,18 +32,16 @@ GradNodePyLayer::~GradNodePyLayer() {
Py_XDECREF(ctx_); Py_XDECREF(ctx_);
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
GradNodePyLayer::operator()( GradNodePyLayer::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph, bool create_graph,
bool is_new_grad) { bool is_new_grad) {
pybind11::gil_scoped_acquire gil; pybind11::gil_scoped_acquire gil;
VLOG(3) << "Running Eager Backward Node: " << name(); VLOG(3) << "Running Eager Backward Node: " << name();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
hooked_grads = GradNodePyLayer::ApplyGradientHooks(grads); hooked_grads = GradNodePyLayer::ApplyGradientHooks(grads);
paddle::pybind::PyLayerObject* ctx = paddle::pybind::PyLayerObject* ctx =
...@@ -64,7 +62,7 @@ GradNodePyLayer::operator()( ...@@ -64,7 +62,7 @@ GradNodePyLayer::operator()(
PyObject* pylist = PyList_New((Py_ssize_t)grads[i].size()); PyObject* pylist = PyList_New((Py_ssize_t)grads[i].size());
for (size_t j = 0; j < grads[i].size(); j++) { for (size_t j = 0; j < grads[i].size(); j++) {
if (ctx->materialize_grads && !grads[i][j].initialized()) { if (ctx->materialize_grads && !grads[i][j].initialized()) {
paddle::experimental::Tensor tensor_tmp; paddle::Tensor tensor_tmp;
auto dense_tensor = std::make_shared<phi::DenseTensor>(); auto dense_tensor = std::make_shared<phi::DenseTensor>();
dense_tensor->set_meta(forward_outputs_meta_[i][j]); dense_tensor->set_meta(forward_outputs_meta_[i][j]);
tensor_tmp.set_impl(dense_tensor); tensor_tmp.set_impl(dense_tensor);
...@@ -84,7 +82,7 @@ GradNodePyLayer::operator()( ...@@ -84,7 +82,7 @@ GradNodePyLayer::operator()(
PyTuple_SET_ITEM(backward_args, i, pylist); PyTuple_SET_ITEM(backward_args, i, pylist);
} else { } else {
if (ctx->materialize_grads && !grads[i][0].initialized()) { if (ctx->materialize_grads && !grads[i][0].initialized()) {
paddle::experimental::Tensor tensor_tmp; paddle::Tensor tensor_tmp;
auto dense_tensor = std::make_shared<phi::DenseTensor>(); auto dense_tensor = std::make_shared<phi::DenseTensor>();
dense_tensor->set_meta(forward_outputs_meta_[i][0]); dense_tensor->set_meta(forward_outputs_meta_[i][0]);
tensor_tmp.set_impl(dense_tensor); tensor_tmp.set_impl(dense_tensor);
...@@ -139,8 +137,7 @@ GradNodePyLayer::operator()( ...@@ -139,8 +137,7 @@ GradNodePyLayer::operator()(
outputs_size)); outputs_size));
} }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
grad_out; grad_out;
grad_out.reserve(ctx->forward_input_tensor_is_duplicable.size()); grad_out.reserve(ctx->forward_input_tensor_is_duplicable.size());
for (size_t i = 0; i < ctx->forward_input_tensor_is_duplicable.size(); i++) { for (size_t i = 0; i < ctx->forward_input_tensor_is_duplicable.size(); i++) {
......
...@@ -48,9 +48,9 @@ class GradNodePyLayer : public GradNodeBase { ...@@ -48,9 +48,9 @@ class GradNodePyLayer : public GradNodeBase {
~GradNodePyLayer() override; ~GradNodePyLayer() override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, virtual paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, operator()(paddle::small_vector<std::vector<paddle::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, bool create_graph = false,
bool is_new_grad = false) override; bool is_new_grad = false) override;
...@@ -60,8 +60,7 @@ class GradNodePyLayer : public GradNodeBase { ...@@ -60,8 +60,7 @@ class GradNodePyLayer : public GradNodeBase {
std::string name() override { return name_; } std::string name() override { return name_; }
void SaveForwardOutputsMeta( void SaveForwardOutputsMeta(
const std::vector<std::vector<paddle::experimental::Tensor*>>& const std::vector<std::vector<paddle::Tensor*>>& outputs_tensor) {
outputs_tensor) {
forward_outputs_meta_.resize(outputs_tensor.size()); forward_outputs_meta_.resize(outputs_tensor.size());
forward_outputs_place_.resize(outputs_tensor.size()); forward_outputs_place_.resize(outputs_tensor.size());
for (size_t i = 0; i < outputs_tensor.size(); i++) { for (size_t i = 0; i < outputs_tensor.size(); i++) {
......
...@@ -37,7 +37,7 @@ namespace egr { ...@@ -37,7 +37,7 @@ namespace egr {
class TensorWrapper { class TensorWrapper {
public: public:
TensorWrapper() = default; TensorWrapper() = default;
explicit TensorWrapper(const paddle::experimental::Tensor& tensor, explicit TensorWrapper(const paddle::Tensor& tensor,
bool no_need_buffer = false) { bool no_need_buffer = false) {
// set inplace_version_snapshot_ according to tensor's current inplace // set inplace_version_snapshot_ according to tensor's current inplace
// version. // version.
...@@ -133,12 +133,12 @@ class TensorWrapper { ...@@ -133,12 +133,12 @@ class TensorWrapper {
} }
#endif #endif
paddle::experimental::Tensor recover() { paddle::Tensor recover() {
VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name() VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name()
<< " for wrapper"; << " for wrapper";
if (!intermidiate_tensor_.defined()) { if (!intermidiate_tensor_.defined()) {
VLOG(6) << "Return NULL tensor Here. "; VLOG(6) << "Return NULL tensor Here. ";
return paddle::experimental::Tensor(); return paddle::Tensor();
} }
#ifndef PADDLE_NO_PYTHON #ifndef PADDLE_NO_PYTHON
if (packed_value_ && unpack_hook_) { if (packed_value_ && unpack_hook_) {
...@@ -154,7 +154,7 @@ class TensorWrapper { ...@@ -154,7 +154,7 @@ class TensorWrapper {
} }
#endif #endif
paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_; paddle::Tensor recovered_tensor = intermidiate_tensor_;
std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock(); std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock();
if (new_grad_node) { if (new_grad_node) {
...@@ -178,9 +178,7 @@ class TensorWrapper { ...@@ -178,9 +178,7 @@ class TensorWrapper {
return recovered_tensor; return recovered_tensor;
} }
paddle::experimental::Tensor get_intermidiate_tensor() { paddle::Tensor get_intermidiate_tensor() { return intermidiate_tensor_; }
return intermidiate_tensor_;
}
void clear() { intermidiate_tensor_.reset(); } void clear() { intermidiate_tensor_.reset(); }
...@@ -223,7 +221,7 @@ class TensorWrapper { ...@@ -223,7 +221,7 @@ class TensorWrapper {
private: private:
bool no_need_buffer_ = false; bool no_need_buffer_ = false;
paddle::experimental::Tensor intermidiate_tensor_; paddle::Tensor intermidiate_tensor_;
std::weak_ptr<egr::GradNodeBase> weak_grad_node_; std::weak_ptr<egr::GradNodeBase> weak_grad_node_;
uint32_t inplace_version_snapshot_ = 0; uint32_t inplace_version_snapshot_ = 0;
#ifndef PADDLE_NO_PYTHON #ifndef PADDLE_NO_PYTHON
......
...@@ -39,7 +39,7 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) { ...@@ -39,7 +39,7 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) {
sr0->mutable_value()->Resize(phi::make_ddim({1, 1})); sr0->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] = sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(10.0f); static_cast<float>(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(sr0); paddle::Tensor et0 = paddle::Tensor(sr0);
std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
...@@ -47,15 +47,14 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) { ...@@ -47,15 +47,14 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) {
meta); meta);
dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(20.0f); static_cast<float>(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); paddle::Tensor et1 = paddle::Tensor(dt1);
std::shared_ptr<phi::DenseTensor> input_dt = std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
paddle::experimental::Tensor input_et = paddle::Tensor input_et = paddle::Tensor(input_dt);
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et); auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor // Initialize Grad Tensor
std::shared_ptr<phi::SelectedRows> grad_dt = std::shared_ptr<phi::SelectedRows> grad_dt =
...@@ -69,19 +68,17 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) { ...@@ -69,19 +68,17 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) {
grad_meta->SetGradNode(node); grad_meta->SetGradNode(node);
grad_meta->SetStopGradient(false); grad_meta->SetStopGradient(false);
// operator() // operator()
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et0_vec = {{et0}}; et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0]; paddle::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr = auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl()) std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl())
->value() ->value()
.data<float>(); .data<float>();
CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f)); CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f));
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et1_vec = {{et1}}; et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0]; paddle::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr = auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl())
->data<float>(); ->data<float>();
...@@ -91,7 +88,7 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) { ...@@ -91,7 +88,7 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) {
->value() ->value()
.data<float>()[0], .data<float>()[0],
static_cast<float>(10.0f)); static_cast<float>(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et); paddle::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr = auto* grad_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())->data<float>(); std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())->data<float>();
CHECK_EQ(grad_ptr[0], static_cast<float>(30.0f)); CHECK_EQ(grad_ptr[0], static_cast<float>(30.0f));
...@@ -107,21 +104,20 @@ TEST(AccumulationNode, SelectedRowsMerge) { ...@@ -107,21 +104,20 @@ TEST(AccumulationNode, SelectedRowsMerge) {
sr0->mutable_value()->Resize(phi::make_ddim({1, 1})); sr0->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] = sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(10.0f); static_cast<float>(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(sr0); paddle::Tensor et0 = paddle::Tensor(sr0);
std::shared_ptr<phi::SelectedRows> sr1 = std::shared_ptr<phi::SelectedRows> sr1 =
std::make_shared<phi::SelectedRows>(rows, 1); std::make_shared<phi::SelectedRows>(rows, 1);
sr1->mutable_value()->Resize(phi::make_ddim({1, 1})); sr1->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr1->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] = sr1->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(20.0f); static_cast<float>(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(sr1); paddle::Tensor et1 = paddle::Tensor(sr1);
std::shared_ptr<phi::DenseTensor> input_dt = std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
paddle::experimental::Tensor input_et = paddle::Tensor input_et = paddle::Tensor(input_dt);
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et); auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor // Initialize Grad Tensor
std::shared_ptr<phi::SelectedRows> grad_dt = std::shared_ptr<phi::SelectedRows> grad_dt =
...@@ -135,19 +131,17 @@ TEST(AccumulationNode, SelectedRowsMerge) { ...@@ -135,19 +131,17 @@ TEST(AccumulationNode, SelectedRowsMerge) {
grad_meta->SetGradNode(node); grad_meta->SetGradNode(node);
grad_meta->SetStopGradient(false); grad_meta->SetStopGradient(false);
// operator() // operator()
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et0_vec = {{et0}}; et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0]; paddle::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr = auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl()) std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl())
->value() ->value()
.data<float>(); .data<float>();
CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f)); CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f));
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et1_vec = {{et1}}; et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0]; paddle::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr = auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et1.impl()) std::dynamic_pointer_cast<phi::SelectedRows>(ret_et1.impl())
->value() ->value()
...@@ -158,7 +152,7 @@ TEST(AccumulationNode, SelectedRowsMerge) { ...@@ -158,7 +152,7 @@ TEST(AccumulationNode, SelectedRowsMerge) {
->value() ->value()
.data<float>()[0], .data<float>()[0],
static_cast<float>(10.0f)); static_cast<float>(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et); paddle::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr = std::dynamic_pointer_cast<phi::SelectedRows>(grad->impl()) auto* grad_ptr = std::dynamic_pointer_cast<phi::SelectedRows>(grad->impl())
->value() ->value()
.data<float>(); .data<float>();
...@@ -175,21 +169,20 @@ TEST(AccumulationNode, SelectedRowsAddTensor) { ...@@ -175,21 +169,20 @@ TEST(AccumulationNode, SelectedRowsAddTensor) {
sr0->mutable_value()->Resize(phi::make_ddim({1, 1})); sr0->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] = sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(10.0f); static_cast<float>(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(sr0); paddle::Tensor et0 = paddle::Tensor(sr0);
std::shared_ptr<phi::SelectedRows> sr1 = std::shared_ptr<phi::SelectedRows> sr1 =
std::make_shared<phi::SelectedRows>(rows, 1); std::make_shared<phi::SelectedRows>(rows, 1);
sr1->mutable_value()->Resize(phi::make_ddim({1, 1})); sr1->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr1->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] = sr1->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(20.0f); static_cast<float>(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(sr1); paddle::Tensor et1 = paddle::Tensor(sr1);
std::shared_ptr<phi::DenseTensor> input_dt = std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
paddle::experimental::Tensor input_et = paddle::Tensor input_et = paddle::Tensor(input_dt);
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et); auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor // Initialize Grad Tensor
std::shared_ptr<phi::DenseTensor> grad_dt = std::shared_ptr<phi::DenseTensor> grad_dt =
...@@ -206,19 +199,17 @@ TEST(AccumulationNode, SelectedRowsAddTensor) { ...@@ -206,19 +199,17 @@ TEST(AccumulationNode, SelectedRowsAddTensor) {
grad_meta->SetGradNode(node); grad_meta->SetGradNode(node);
grad_meta->SetStopGradient(false); grad_meta->SetStopGradient(false);
// operator() // operator()
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et0_vec = {{et0}}; et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0]; paddle::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr = auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl()) std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl())
->value() ->value()
.data<float>(); .data<float>();
CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f)); CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f));
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et1_vec = {{et1}}; et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0]; paddle::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr = auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et1.impl()) std::dynamic_pointer_cast<phi::SelectedRows>(ret_et1.impl())
->value() ->value()
...@@ -229,7 +220,7 @@ TEST(AccumulationNode, SelectedRowsAddTensor) { ...@@ -229,7 +220,7 @@ TEST(AccumulationNode, SelectedRowsAddTensor) {
->value() ->value()
.data<float>()[0], .data<float>()[0],
static_cast<float>(10.0f)); static_cast<float>(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et); paddle::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr = auto* grad_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())->data<float>(); std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())->data<float>();
CHECK_EQ(grad_ptr[0], static_cast<float>(30.0f)); CHECK_EQ(grad_ptr[0], static_cast<float>(30.0f));
...@@ -246,7 +237,7 @@ TEST(AccumulationNode, Tensor) { ...@@ -246,7 +237,7 @@ TEST(AccumulationNode, Tensor) {
meta); meta);
dt0->mutable_data<paddle::platform::float16>( dt0->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = paddle::platform::float16(10.0f); paddle::platform::CPUPlace())[0] = paddle::platform::float16(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0); paddle::Tensor et0 = paddle::Tensor(dt0);
std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
...@@ -256,7 +247,7 @@ TEST(AccumulationNode, Tensor) { ...@@ -256,7 +247,7 @@ TEST(AccumulationNode, Tensor) {
dt1->mutable_data<paddle::platform::float16>( dt1->mutable_data<paddle::platform::float16>(
paddle::platform::CPUPlace())[0] = paddle::platform::float16(20.0f); paddle::platform::CPUPlace())[0] = paddle::platform::float16(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); paddle::Tensor et1 = paddle::Tensor(dt1);
std::shared_ptr<phi::DenseTensor> input_dt = std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
...@@ -264,8 +255,7 @@ TEST(AccumulationNode, Tensor) { ...@@ -264,8 +255,7 @@ TEST(AccumulationNode, Tensor) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
paddle::experimental::Tensor input_et = paddle::Tensor input_et = paddle::Tensor(input_dt);
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et); auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor // Initialize Grad Tensor
...@@ -285,19 +275,17 @@ TEST(AccumulationNode, Tensor) { ...@@ -285,19 +275,17 @@ TEST(AccumulationNode, Tensor) {
grad_meta->SetStopGradient(false); grad_meta->SetStopGradient(false);
// operator() // operator()
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et0_vec = {{et0}}; et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0]; paddle::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr = auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f)); CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f));
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
et1_vec = {{et1}}; et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0]; paddle::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr = auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl())
...@@ -308,7 +296,7 @@ TEST(AccumulationNode, Tensor) { ...@@ -308,7 +296,7 @@ TEST(AccumulationNode, Tensor) {
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl()) CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(et0.impl())
->data<paddle::platform::float16>()[0], ->data<paddle::platform::float16>()[0],
paddle::platform::float16(10.0f)); paddle::platform::float16(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et); paddle::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl()) auto* grad_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(grad_ptr[0], paddle::platform::float16(30.0f)); CHECK_EQ(grad_ptr[0], paddle::platform::float16(30.0f));
...@@ -331,7 +319,7 @@ TEST(AccumulationNode, Tensor) { ...@@ -331,7 +319,7 @@ TEST(AccumulationNode, Tensor) {
node->RegisterReduceHook(std::make_shared<egr::CppVoidHook>(reduce_hook_1)); node->RegisterReduceHook(std::make_shared<egr::CppVoidHook>(reduce_hook_1));
// operator() // operator()
paddle::experimental::Tensor _ret = node->operator()(et0_vec)[0][0]; paddle::Tensor _ret = node->operator()(et0_vec)[0][0];
// Check operator() result, should be 36.0 // Check operator() result, should be 36.0
auto* _ret_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(_ret.impl()) auto* _ret_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(_ret.impl())
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/api/lib/utils/allocator.h"
TEST(AutogradMeta, Constructor) { TEST(AutogradMeta, Constructor) {
paddle::experimental::Tensor et1; paddle::Tensor et1;
auto auto_grad = std::make_shared<egr::AutogradMeta>(); auto auto_grad = std::make_shared<egr::AutogradMeta>();
et1.set_autograd_meta(auto_grad); et1.set_autograd_meta(auto_grad);
auto* tmp_auto = static_cast<egr::AutogradMeta*>(et1.get_autograd_meta()); auto* tmp_auto = static_cast<egr::AutogradMeta*>(et1.get_autograd_meta());
...@@ -32,7 +32,7 @@ TEST(AutogradMeta, Constructor) { ...@@ -32,7 +32,7 @@ TEST(AutogradMeta, Constructor) {
} }
TEST(AutogradMeta, MemberFunction) { TEST(AutogradMeta, MemberFunction) {
paddle::experimental::Tensor et1; paddle::Tensor et1;
auto auto_grad = std::make_shared<egr::AutogradMeta>(); auto auto_grad = std::make_shared<egr::AutogradMeta>();
et1.set_autograd_meta(auto_grad); et1.set_autograd_meta(auto_grad);
auto* tmp_auto = static_cast<egr::AutogradMeta*>(et1.get_autograd_meta()); auto* tmp_auto = static_cast<egr::AutogradMeta*>(et1.get_autograd_meta());
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
namespace eager_test { namespace eager_test {
using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta; using AbstractAutogradMeta = paddle::AbstractAutogradMeta;
class AutogradMetaTest : public AbstractAutogradMeta { class AutogradMetaTest : public AbstractAutogradMeta {
public: public:
explicit AutogradMetaTest(int val) : val_(val) {} explicit AutogradMetaTest(int val) : val_(val) {}
...@@ -30,8 +30,8 @@ class AutogradMetaTest : public AbstractAutogradMeta { ...@@ -30,8 +30,8 @@ class AutogradMetaTest : public AbstractAutogradMeta {
}; };
} // namespace eager_test } // namespace eager_test
TEST(Tensor, Constructor) { TEST(Tensor, Constructor) {
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(); paddle::Tensor et1 = paddle::Tensor();
paddle::experimental::Tensor et2 = paddle::experimental::Tensor("et2"); paddle::Tensor et2 = paddle::Tensor("et2");
CHECK_EQ(et1.defined(), false); CHECK_EQ(et1.defined(), false);
CHECK_EQ(et2.name(), "et2"); CHECK_EQ(et2.name(), "et2");
...@@ -46,18 +46,18 @@ TEST(Tensor, Constructor) { ...@@ -46,18 +46,18 @@ TEST(Tensor, Constructor) {
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace()); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f; dt_ptr[1] = 10.0f;
paddle::experimental::Tensor et3 = paddle::experimental::Tensor(dt); paddle::Tensor et3 = paddle::Tensor(dt);
auto* et3_ptr = auto* et3_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(et3.impl())->data<float>(); std::dynamic_pointer_cast<phi::DenseTensor>(et3.impl())->data<float>();
CHECK_EQ(et3_ptr[0], 5.0f); CHECK_EQ(et3_ptr[0], 5.0f);
CHECK_EQ(et3_ptr[1], 10.0f); CHECK_EQ(et3_ptr[1], 10.0f);
// copy constructor // copy constructor
paddle::experimental::Tensor et4(et3); paddle::Tensor et4(et3);
auto* et4_ptr = auto* et4_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(et4.impl())->data<float>(); std::dynamic_pointer_cast<phi::DenseTensor>(et4.impl())->data<float>();
CHECK_EQ(et4_ptr[0], 5.0f); CHECK_EQ(et4_ptr[0], 5.0f);
CHECK_EQ(et4_ptr[1], 10.0f); CHECK_EQ(et4_ptr[1], 10.0f);
paddle::experimental::Tensor et5(std::move(et4)); paddle::Tensor et5(std::move(et4));
auto* et5_ptr = auto* et5_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(et5.impl())->data<float>(); std::dynamic_pointer_cast<phi::DenseTensor>(et5.impl())->data<float>();
CHECK_EQ(et5_ptr[0], 5.0f); CHECK_EQ(et5_ptr[0], 5.0f);
...@@ -65,7 +65,7 @@ TEST(Tensor, Constructor) { ...@@ -65,7 +65,7 @@ TEST(Tensor, Constructor) {
} }
TEST(Tensor, MemberFunction) { TEST(Tensor, MemberFunction) {
paddle::experimental::Tensor et3; paddle::Tensor et3;
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
...@@ -97,7 +97,7 @@ TEST(Tensor, MemberFunction) { ...@@ -97,7 +97,7 @@ TEST(Tensor, MemberFunction) {
std::dynamic_pointer_cast<phi::DenseTensor>(et3.impl())->data<float>(); std::dynamic_pointer_cast<phi::DenseTensor>(et3.impl())->data<float>();
CHECK_EQ(dt3_ptr[0], 5.0f); CHECK_EQ(dt3_ptr[0], 5.0f);
CHECK_EQ(dt3_ptr[1], 10.0f); CHECK_EQ(dt3_ptr[1], 10.0f);
paddle::experimental::Tensor et4 = et3; paddle::Tensor et4 = et3;
VLOG(6) << "copy ="; VLOG(6) << "copy =";
CHECK(et4.initialized() == true); CHECK(et4.initialized() == true);
auto* dt4_ptr = auto* dt4_ptr =
...@@ -105,7 +105,7 @@ TEST(Tensor, MemberFunction) { ...@@ -105,7 +105,7 @@ TEST(Tensor, MemberFunction) {
CHECK_EQ(dt4_ptr[0], 5.0f); CHECK_EQ(dt4_ptr[0], 5.0f);
CHECK_EQ(dt4_ptr[1], 10.0f); CHECK_EQ(dt4_ptr[1], 10.0f);
VLOG(6) << "move ="; VLOG(6) << "move =";
paddle::experimental::Tensor et5 = std::move(et4); paddle::Tensor et5 = std::move(et4);
auto* dt5_ptr = auto* dt5_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(et5.impl())->data<float>(); std::dynamic_pointer_cast<phi::DenseTensor>(et5.impl())->data<float>();
CHECK_EQ(dt5_ptr[0], 5.0f); CHECK_EQ(dt5_ptr[0], 5.0f);
...@@ -119,7 +119,7 @@ TEST(Tensor, MemberFunction) { ...@@ -119,7 +119,7 @@ TEST(Tensor, MemberFunction) {
} }
TEST(EagerVariable, Constructor) { TEST(EagerVariable, Constructor) {
paddle::experimental::Tensor t3; paddle::Tensor t3;
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
...@@ -142,7 +142,7 @@ TEST(EagerVariable, Constructor) { ...@@ -142,7 +142,7 @@ TEST(EagerVariable, Constructor) {
CHECK_EQ(et3.Var().Get<phi::DenseTensor>().data<float>()[0], 5.0f); CHECK_EQ(et3.Var().Get<phi::DenseTensor>().data<float>()[0], 5.0f);
CHECK_EQ(et3.Var().Get<phi::DenseTensor>().data<float>()[1], 10.0f); CHECK_EQ(et3.Var().Get<phi::DenseTensor>().data<float>()[1], 10.0f);
VLOG(6) << "SyncToTensor"; VLOG(6) << "SyncToTensor";
paddle::experimental::Tensor t4; paddle::Tensor t4;
t4.set_impl(et3.GetTensorBase()); t4.set_impl(et3.GetTensorBase());
CHECK(t4.initialized() == true); CHECK(t4.initialized() == true);
VLOG(6) << "Check Tensor"; VLOG(6) << "Check Tensor";
...@@ -156,7 +156,7 @@ TEST(EagerVariable, Constructor) { ...@@ -156,7 +156,7 @@ TEST(EagerVariable, Constructor) {
VLOG(6) << "Check Tensor Copy_"; VLOG(6) << "Check Tensor Copy_";
std::vector<int64_t> rows = {1, 2}; std::vector<int64_t> rows = {1, 2};
std::vector<int64_t> dims = {2}; std::vector<int64_t> dims = {2};
paddle::experimental::Tensor t7(std::make_shared<phi::SelectedRows>(rows, 2)); paddle::Tensor t7(std::make_shared<phi::SelectedRows>(rows, 2));
std::dynamic_pointer_cast<phi::SelectedRows>(t7.impl()) std::dynamic_pointer_cast<phi::SelectedRows>(t7.impl())
->mutable_value() ->mutable_value()
->Resize(phi::make_ddim(dims)); ->Resize(phi::make_ddim(dims));
...@@ -166,11 +166,11 @@ TEST(EagerVariable, Constructor) { ...@@ -166,11 +166,11 @@ TEST(EagerVariable, Constructor) {
dt7_tmp_ptr[0] = 6.0f; dt7_tmp_ptr[0] = 6.0f;
dt7_tmp_ptr[1] = 11.0f; dt7_tmp_ptr[1] = 11.0f;
paddle::experimental::Tensor t8; paddle::Tensor t8;
paddle::experimental::Tensor t5; paddle::Tensor t5;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
paddle::experimental::Tensor t6; paddle::Tensor t6;
paddle::experimental::Tensor t9; paddle::Tensor t9;
VLOG(6) << "Check Tensor Copy_ Selected Rows"; VLOG(6) << "Check Tensor Copy_ Selected Rows";
t8.copy_(t7, paddle::platform::CUDAPlace(0), false); t8.copy_(t7, paddle::platform::CUDAPlace(0), false);
t9.copy_(t8, paddle::platform::CPUPlace(), false); t9.copy_(t8, paddle::platform::CPUPlace(), false);
...@@ -201,7 +201,7 @@ TEST(EagerVariable, Constructor) { ...@@ -201,7 +201,7 @@ TEST(EagerVariable, Constructor) {
} }
TEST(EagerVariable, DataLayout) { TEST(EagerVariable, DataLayout) {
paddle::experimental::Tensor tensor; paddle::Tensor tensor;
phi::DenseTensorMeta meta = phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::DenseTensorMeta meta = phi::DenseTensorMeta(phi::DataType::FLOAT32,
phi::make_ddim({1, 1, 1, 1}), phi::make_ddim({1, 1, 1, 1}),
phi::DataLayout::UNDEFINED); phi::DataLayout::UNDEFINED);
......
...@@ -34,8 +34,7 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ...@@ -34,8 +34,7 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>( auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 2, /* out_num */ 2); /* val */ 5.0, /* in_num */ 2, /* out_num */ 2);
auto grad_test_node1 = std::make_shared<eager_test::GradTestNode>(); auto grad_test_node1 = std::make_shared<eager_test::GradTestNode>();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
grads; grads;
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
...@@ -46,7 +45,7 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ...@@ -46,7 +45,7 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace()); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
paddle::experimental::Tensor et1(dt); paddle::Tensor et1(dt);
grads = {{et1}}; grads = {{et1}};
VLOG(6) << "Test Grad Node Call"; VLOG(6) << "Test Grad Node Call";
auto res = (*grad_test_node0)(grads); auto res = (*grad_test_node0)(grads);
...@@ -85,9 +84,8 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ...@@ -85,9 +84,8 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
CHECK_EQ(grad_test_node2->OutputMeta()[0].size(), size_t(1)); CHECK_EQ(grad_test_node2->OutputMeta()[0].size(), size_t(1));
VLOG(6) << "Test Gradient Hook"; VLOG(6) << "Test Gradient Hook";
auto gradient_hook = [](const paddle::experimental::Tensor& et) auto gradient_hook = [](const paddle::Tensor& et) -> paddle::Tensor {
-> paddle::experimental::Tensor { paddle::Tensor res;
paddle::experimental::Tensor res;
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
...@@ -133,7 +131,7 @@ TEST(GradNodeInfo, Edge) { ...@@ -133,7 +131,7 @@ TEST(GradNodeInfo, Edge) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
paddle::experimental::Tensor et1(dt); paddle::Tensor et1(dt);
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(5, 2, 2); auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(5, 2, 2);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(); auto auto_grad1 = std::make_shared<egr::AutogradMeta>();
......
...@@ -30,9 +30,8 @@ class GradTestNode : public egr::GradNodeBase { ...@@ -30,9 +30,8 @@ class GradTestNode : public egr::GradNodeBase {
: GradNodeBase(in_num, out_num), val_(val) {} : GradNodeBase(in_num, out_num), val_(val) {}
GradTestNode() : GradNodeBase() { val_ = 1.0; } GradTestNode() : GradNodeBase() { val_ = 1.0; }
std::string name() override { return "GradTestNode"; } std::string name() override { return "GradTestNode"; }
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize> operator()(paddle::small_vector<std::vector<paddle::Tensor>,
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>& grads, // NOLINT egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, bool create_graph = false,
bool is_new_grad = false) override { bool is_new_grad = false) override {
...@@ -47,9 +46,8 @@ class GradTestNode : public egr::GradNodeBase { ...@@ -47,9 +46,8 @@ class GradTestNode : public egr::GradNodeBase {
meta); meta);
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace()); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 6.0f; dt_ptr[0] = 6.0f;
paddle::experimental::Tensor et1(dt); paddle::Tensor et1(dt);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
res = {{et1}}; res = {{et1}};
return res; return res;
} }
......
...@@ -43,10 +43,9 @@ TEST(GradTensorHolder, Constructor) { ...@@ -43,10 +43,9 @@ TEST(GradTensorHolder, Constructor) {
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
.get(), .get(),
meta); meta);
paddle::experimental::Tensor et = paddle::experimental::Tensor(dt); paddle::Tensor et = paddle::Tensor(dt);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, kSlotSmallVectorSize>
kSlotSmallVectorSize>
inputs; inputs;
inputs.push_back({et}); inputs.push_back({et});
...@@ -63,7 +62,7 @@ TEST(GradTensorHolder, Interfaces) { ...@@ -63,7 +62,7 @@ TEST(GradTensorHolder, Interfaces) {
.get(), .get(),
meta); meta);
dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0; dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0;
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0); paddle::Tensor et0 = paddle::Tensor(dt0);
std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
...@@ -71,7 +70,7 @@ TEST(GradTensorHolder, Interfaces) { ...@@ -71,7 +70,7 @@ TEST(GradTensorHolder, Interfaces) {
.get(), .get(),
meta); meta);
dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0; dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0;
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); paddle::Tensor et1 = paddle::Tensor(dt1);
// Constructor empty GradTensorHolder // Constructor empty GradTensorHolder
std::vector<GradSlotMeta> slot_meta(1); std::vector<GradSlotMeta> slot_meta(1);
...@@ -135,8 +134,8 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) { ...@@ -135,8 +134,8 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) {
} }
} }
// new 2 phi::Tensor // new 2 phi::Tensor
paddle::experimental::Tensor t1(sr1); paddle::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2); paddle::Tensor t2(sr2);
// Constructor empty GradTensorHolder // Constructor empty GradTensorHolder
std::vector<GradSlotMeta> slot_meta(1); std::vector<GradSlotMeta> slot_meta(1);
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
TEST(TensorWrapper, Basic) { TEST(TensorWrapper, Basic) {
VLOG(6) << "Test Full reserved"; VLOG(6) << "Test Full reserved";
paddle::experimental::Tensor et1; paddle::Tensor et1;
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
...@@ -50,7 +50,7 @@ TEST(TensorWrapper, Basic) { ...@@ -50,7 +50,7 @@ TEST(TensorWrapper, Basic) {
CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et1).second, CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et1).second,
egr::EagerUtils::OutRankInfo(et1).second); egr::EagerUtils::OutRankInfo(et1).second);
VLOG(6) << "Test reconstruct"; VLOG(6) << "Test reconstruct";
paddle::experimental::Tensor et2; paddle::Tensor et2;
phi::DenseTensorMeta meta2 = phi::DenseTensorMeta meta2 =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2}));
std::shared_ptr<phi::DenseTensor> dt2 = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt2 = std::make_shared<phi::DenseTensor>(
...@@ -78,7 +78,7 @@ TEST(TensorWrapper, Basic) { ...@@ -78,7 +78,7 @@ TEST(TensorWrapper, Basic) {
CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et2).second, CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et2).second,
egr::EagerUtils::OutRankInfo(et2).second); egr::EagerUtils::OutRankInfo(et2).second);
// Test Raw recover // Test Raw recover
paddle::experimental::Tensor et3; paddle::Tensor et3;
auto tw2 = egr::TensorWrapper(et3); auto tw2 = egr::TensorWrapper(et3);
CHECK(tw2.recover().initialized() == false); CHECK(tw2.recover().initialized() == false);
} }
...@@ -50,13 +50,12 @@ TEST(Benchmark, EagerScaleCPU) { ...@@ -50,13 +50,12 @@ TEST(Benchmark, EagerScaleCPU) {
for (const std::string& mode : {"Accuracy", "Performance"}) { for (const std::string& mode : {"Accuracy", "Performance"}) {
paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
paddle::experimental::Tensor tensor = paddle::Tensor tensor = CreateTensorWithValue(ddim,
CreateTensorWithValue(ddim, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 5.0,
5.0, true);
true);
RetainGradForTensor(tensor); RetainGradForTensor(tensor);
if (mode == "Accuracy") { if (mode == "Accuracy") {
...@@ -90,23 +89,21 @@ TEST(Benchmark, EagerMatmulCPU) { ...@@ -90,23 +89,21 @@ TEST(Benchmark, EagerMatmulCPU) {
for (const std::string& mode : {"Accuracy", "Performance"}) { for (const std::string& mode : {"Accuracy", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); paddle::framework::DDim ddimX = phi::make_ddim({2, 2});
paddle::experimental::Tensor X = paddle::Tensor X = CreateTensorWithValue(ddimX,
CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 1.0,
1.0, true);
true);
RetainGradForTensor(X); RetainGradForTensor(X);
paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); paddle::framework::DDim ddimY = phi::make_ddim({2, 2});
paddle::experimental::Tensor Y = paddle::Tensor Y = CreateTensorWithValue(ddimY,
CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 2.0,
2.0, true);
true);
RetainGradForTensor(Y); RetainGradForTensor(Y);
if (mode == "Accuracy") { if (mode == "Accuracy") {
...@@ -142,23 +139,21 @@ TEST(Benchmark, EagerIntermediateMatmulCPU) { ...@@ -142,23 +139,21 @@ TEST(Benchmark, EagerIntermediateMatmulCPU) {
for (const std::string& mode : {"Accuracy", "Performance"}) { for (const std::string& mode : {"Accuracy", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); paddle::framework::DDim ddimX = phi::make_ddim({2, 2});
paddle::experimental::Tensor X = paddle::Tensor X = CreateTensorWithValue(ddimX,
CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 1.0,
1.0, true);
true);
RetainGradForTensor(X); RetainGradForTensor(X);
paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); paddle::framework::DDim ddimY = phi::make_ddim({2, 2});
paddle::experimental::Tensor Y = paddle::Tensor Y = CreateTensorWithValue(ddimY,
CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 2.0,
2.0, true);
true);
RetainGradForTensor(Y); RetainGradForTensor(Y);
if (mode == "Accuracy") { if (mode == "Accuracy") {
...@@ -194,36 +189,33 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { ...@@ -194,36 +189,33 @@ TEST(Benchmark, EagerIntermediateMLPCPU) {
for (const std::string& mode : {"Accuracy", "Performance"}) { for (const std::string& mode : {"Accuracy", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({MLP_M, MLP_N}); paddle::framework::DDim ddimX = phi::make_ddim({MLP_M, MLP_N});
paddle::experimental::Tensor X = paddle::Tensor X = CreateTensorWithValue(ddimX,
CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, MLP_X_VAL,
MLP_X_VAL, true);
true);
RetainGradForTensor(X); RetainGradForTensor(X);
std::vector<paddle::experimental::Tensor> Ws; std::vector<paddle::Tensor> Ws;
std::vector<paddle::experimental::Tensor> Bs; std::vector<paddle::Tensor> Bs;
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
paddle::framework::DDim ddimW = phi::make_ddim({MLP_N, MLP_K}); paddle::framework::DDim ddimW = phi::make_ddim({MLP_N, MLP_K});
paddle::experimental::Tensor W = paddle::Tensor W = CreateTensorWithValue(ddimW,
CreateTensorWithValue(ddimW, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, MLP_W_VAL,
MLP_W_VAL, true);
true);
RetainGradForTensor(W); RetainGradForTensor(W);
paddle::framework::DDim ddimB = phi::make_ddim({MLP_K}); paddle::framework::DDim ddimB = phi::make_ddim({MLP_K});
paddle::experimental::Tensor B = paddle::Tensor B = CreateTensorWithValue(ddimB,
CreateTensorWithValue(ddimB, paddle::platform::CPUPlace(),
paddle::platform::CPUPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, MLP_B_VAL,
MLP_B_VAL, true);
true);
RetainGradForTensor(B); RetainGradForTensor(B);
Ws.emplace_back(std::move(W)); Ws.emplace_back(std::move(W));
......
...@@ -50,13 +50,12 @@ TEST(Benchmark, EagerScaleCUDA) { ...@@ -50,13 +50,12 @@ TEST(Benchmark, EagerScaleCUDA) {
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
paddle::experimental::Tensor tensor = paddle::Tensor tensor = CreateTensorWithValue(ddim,
CreateTensorWithValue(ddim, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 5.0 /*value*/,
5.0 /*value*/, true /*is_leaf*/);
true /*is_leaf*/);
RetainGradForTensor(tensor); RetainGradForTensor(tensor);
if (mode == "Accuracy") { if (mode == "Accuracy") {
...@@ -92,23 +91,21 @@ TEST(Benchmark, EagerMatmulCUDA) { ...@@ -92,23 +91,21 @@ TEST(Benchmark, EagerMatmulCUDA) {
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); paddle::framework::DDim ddimX = phi::make_ddim({2, 2});
paddle::experimental::Tensor X = paddle::Tensor X = CreateTensorWithValue(ddimX,
CreateTensorWithValue(ddimX, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 1.0,
1.0, true);
true);
RetainGradForTensor(X); RetainGradForTensor(X);
paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); paddle::framework::DDim ddimY = phi::make_ddim({2, 2});
paddle::experimental::Tensor Y = paddle::Tensor Y = CreateTensorWithValue(ddimY,
CreateTensorWithValue(ddimY, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 2.0,
2.0, true);
true);
RetainGradForTensor(Y); RetainGradForTensor(Y);
if (mode == "Accuracy") { if (mode == "Accuracy") {
...@@ -148,23 +145,21 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) { ...@@ -148,23 +145,21 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) {
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); paddle::framework::DDim ddimX = phi::make_ddim({2, 2});
paddle::experimental::Tensor X = paddle::Tensor X = CreateTensorWithValue(ddimX,
CreateTensorWithValue(ddimX, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 1.0,
1.0, true);
true);
RetainGradForTensor(X); RetainGradForTensor(X);
paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); paddle::framework::DDim ddimY = phi::make_ddim({2, 2});
paddle::experimental::Tensor Y = paddle::Tensor Y = CreateTensorWithValue(ddimY,
CreateTensorWithValue(ddimY, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, 2.0,
2.0, true);
true);
RetainGradForTensor(Y); RetainGradForTensor(Y);
if (mode == "Accuracy") { if (mode == "Accuracy") {
...@@ -204,36 +199,33 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { ...@@ -204,36 +199,33 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) {
for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) {
paddle::framework::DDim ddimX = phi::make_ddim({MLP_M, MLP_N}); paddle::framework::DDim ddimX = phi::make_ddim({MLP_M, MLP_N});
paddle::experimental::Tensor X = paddle::Tensor X = CreateTensorWithValue(ddimX,
CreateTensorWithValue(ddimX, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, MLP_X_VAL,
MLP_X_VAL, true);
true);
RetainGradForTensor(X); RetainGradForTensor(X);
std::vector<paddle::experimental::Tensor> Ws; std::vector<paddle::Tensor> Ws;
std::vector<paddle::experimental::Tensor> Bs; std::vector<paddle::Tensor> Bs;
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
paddle::framework::DDim ddimW = phi::make_ddim({MLP_N, MLP_K}); paddle::framework::DDim ddimW = phi::make_ddim({MLP_N, MLP_K});
paddle::experimental::Tensor W = paddle::Tensor W = CreateTensorWithValue(ddimW,
CreateTensorWithValue(ddimW, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, MLP_W_VAL,
MLP_W_VAL, true);
true);
RetainGradForTensor(W); RetainGradForTensor(W);
paddle::framework::DDim ddimB = phi::make_ddim({MLP_K}); paddle::framework::DDim ddimB = phi::make_ddim({MLP_K});
paddle::experimental::Tensor B = paddle::Tensor B = CreateTensorWithValue(ddimB,
CreateTensorWithValue(ddimB, paddle::platform::CUDAPlace(),
paddle::platform::CUDAPlace(), phi::DataType::FLOAT32,
phi::DataType::FLOAT32, phi::DataLayout::NCHW,
phi::DataLayout::NCHW, MLP_B_VAL,
MLP_B_VAL, true);
true);
RetainGradForTensor(B); RetainGradForTensor(B);
Ws.emplace_back(std::move(W)); Ws.emplace_back(std::move(W));
......
...@@ -44,9 +44,8 @@ namespace egr { ...@@ -44,9 +44,8 @@ namespace egr {
/* --------------------- */ /* --------------------- */
/* ---- Eager Scale ---- */ /* ---- Eager Scale ---- */
/* --------------------- */ /* --------------------- */
void benchmark_eager_scale(const paddle::experimental::Tensor& tensor, void benchmark_eager_scale(const paddle::Tensor& tensor, bool accuracy_check) {
bool accuracy_check) { paddle::Tensor input_tensor = tensor;
paddle::experimental::Tensor input_tensor = tensor;
float scale = 2.0; float scale = 2.0;
float bias = 3.0; float bias = 3.0;
...@@ -59,7 +58,7 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor, ...@@ -59,7 +58,7 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
true /*trace_backward*/); true /*trace_backward*/);
} }
std::vector<paddle::experimental::Tensor> target_tensors = {input_tensor}; std::vector<paddle::Tensor> target_tensors = {input_tensor};
Backward(target_tensors, {}); Backward(target_tensors, {});
if (accuracy_check) { if (accuracy_check) {
...@@ -70,17 +69,17 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor, ...@@ -70,17 +69,17 @@ void benchmark_eager_scale(const paddle::experimental::Tensor& tensor,
} }
} }
void benchmark_eager_matmul(const paddle::experimental::Tensor& X, void benchmark_eager_matmul(const paddle::Tensor& X,
const paddle::experimental::Tensor& Y, const paddle::Tensor& Y,
bool accuracy_check) { bool accuracy_check) {
paddle::experimental::Tensor input_tensor0 = X; paddle::Tensor input_tensor0 = X;
size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs; size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs;
for (size_t i = 0; i < max_num_runs; i++) { for (size_t i = 0; i < max_num_runs; i++) {
input_tensor0 = matmul_ad_func(input_tensor0, Y, false, false); input_tensor0 = matmul_ad_func(input_tensor0, Y, false, false);
} }
std::vector<paddle::experimental::Tensor> target_tensors = {input_tensor0}; std::vector<paddle::Tensor> target_tensors = {input_tensor0};
Backward(target_tensors, {}); Backward(target_tensors, {});
if (accuracy_check) { if (accuracy_check) {
...@@ -95,10 +94,10 @@ void benchmark_eager_matmul(const paddle::experimental::Tensor& X, ...@@ -95,10 +94,10 @@ void benchmark_eager_matmul(const paddle::experimental::Tensor& X,
/* ----------------------------------- */ /* ----------------------------------- */
/* ---- Eager Intermediate Matmul ---- */ /* ---- Eager Intermediate Matmul ---- */
/* ----------------------------------- */ /* ----------------------------------- */
void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X, void benchmark_eager_intermediate_matmul(const paddle::Tensor& X,
const paddle::experimental::Tensor& Y, const paddle::Tensor& Y,
bool accuracy_check) { bool accuracy_check) {
paddle::experimental::Tensor input_tensor0 = X; paddle::Tensor input_tensor0 = X;
size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs; size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs;
for (size_t i = 0; i < max_num_runs; i++) { for (size_t i = 0; i < max_num_runs; i++) {
...@@ -106,7 +105,7 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X, ...@@ -106,7 +105,7 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X,
input_tensor0, Y, {{"trans_x", false}, {"trans_y", false}}); input_tensor0, Y, {{"trans_x", false}, {"trans_y", false}});
} }
std::vector<paddle::experimental::Tensor> target_tensors = {input_tensor0}; std::vector<paddle::Tensor> target_tensors = {input_tensor0};
Backward(target_tensors, {}); Backward(target_tensors, {});
if (accuracy_check) { if (accuracy_check) {
...@@ -121,24 +120,23 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X, ...@@ -121,24 +120,23 @@ void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X,
/* -------------------------------- */ /* -------------------------------- */
/* ---- Eager Intermediate MLP ---- */ /* ---- Eager Intermediate MLP ---- */
/* -------------------------------- */ /* -------------------------------- */
void benchmark_eager_intermediate_mlp( void benchmark_eager_intermediate_mlp(const paddle::Tensor& X,
const paddle::experimental::Tensor& X, const std::vector<paddle::Tensor>& Ws,
const std::vector<paddle::experimental::Tensor>& Ws, const std::vector<paddle::Tensor>& Bs,
const std::vector<paddle::experimental::Tensor>& Bs, bool accuracy_check) {
bool accuracy_check) { paddle::Tensor input0 = X;
paddle::experimental::Tensor input0 = X;
for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { for (size_t i = 0; i < MLP_NUM_LINEAR; i++) {
paddle::experimental::Tensor Out = matmul_v2_dygraph_function( paddle::Tensor Out = matmul_v2_dygraph_function(
input0, Ws[i], {{"trans_x", false}, {"trans_y", false}}); input0, Ws[i], {{"trans_x", false}, {"trans_y", false}});
input0 = elementwise_add_dygraph_function(Out, Bs[i], {}); input0 = elementwise_add_dygraph_function(Out, Bs[i], {});
} }
paddle::experimental::Tensor Out = paddle::Tensor Out =
reduce_sum_dygraph_function(input0, {{"reduce_all", true}}); reduce_sum_dygraph_function(input0, {{"reduce_all", true}});
std::vector<paddle::experimental::Tensor> target_tensors = {Out}; std::vector<paddle::Tensor> target_tensors = {Out};
Backward(target_tensors, {}); Backward(target_tensors, {});
if (accuracy_check) { if (accuracy_check) {
......
...@@ -48,23 +48,22 @@ inline std::unordered_map<std::string, float> compute_mlp_expected_results() { ...@@ -48,23 +48,22 @@ inline std::unordered_map<std::string, float> compute_mlp_expected_results() {
} }
/* ---- Eager Scale ---- */ /* ---- Eager Scale ---- */
void benchmark_eager_scale(const paddle::experimental::Tensor& tensor, void benchmark_eager_scale(const paddle::Tensor& tensor,
bool accuracy_check = false); bool accuracy_check = false);
/* ---- Eager MatMul ---- */ /* ---- Eager MatMul ---- */
void benchmark_eager_matmul(const paddle::experimental::Tensor& X, void benchmark_eager_matmul(const paddle::Tensor& X,
const paddle::experimental::Tensor& Y, const paddle::Tensor& Y,
bool accuracy_check = false); bool accuracy_check = false);
void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X, void benchmark_eager_intermediate_matmul(const paddle::Tensor& X,
const paddle::experimental::Tensor& Y, const paddle::Tensor& Y,
bool accuracy_check = false); bool accuracy_check = false);
void benchmark_eager_intermediate_mlp( void benchmark_eager_intermediate_mlp(const paddle::Tensor& X,
const paddle::experimental::Tensor& X, const std::vector<paddle::Tensor>& Ws,
const std::vector<paddle::experimental::Tensor>& Ws, const std::vector<paddle::Tensor>& Bs,
const std::vector<paddle::experimental::Tensor>& Bs, bool accuracy_check = false);
bool accuracy_check = false);
} // namespace egr } // namespace egr
......
...@@ -42,7 +42,7 @@ TEST(Backward, SingleNodeEmptyGrad) { ...@@ -42,7 +42,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
paddle::experimental::Tensor target_tensor = paddle::Tensor target_tensor =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -50,7 +50,7 @@ TEST(Backward, SingleNodeEmptyGrad) { ...@@ -50,7 +50,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
1.0 /*value*/, 1.0 /*value*/,
false /*is_leaf*/); false /*is_leaf*/);
paddle::experimental::Tensor leaf_tensor; paddle::Tensor leaf_tensor;
{ {
// Create Scale Node // Create Scale Node
auto node0_ptr = std::make_shared<GradNodeScale>(1, 1); auto node0_ptr = std::make_shared<GradNodeScale>(1, 1);
...@@ -77,7 +77,7 @@ TEST(Backward, SingleNodeEmptyGrad) { ...@@ -77,7 +77,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
node0_ptr->SetGradOutMeta({leaf_tensor}, 0); node0_ptr->SetGradOutMeta({leaf_tensor}, 0);
} }
std::vector<paddle::experimental::Tensor> outs = {target_tensor}; std::vector<paddle::Tensor> outs = {target_tensor};
// Run Backward // Run Backward
Backward(outs, {}); Backward(outs, {});
...@@ -90,11 +90,11 @@ TEST(Backward, SingleNodeCustomGrad) { ...@@ -90,11 +90,11 @@ TEST(Backward, SingleNodeCustomGrad) {
eager_test::InitEnv(paddle::platform::CPUPlace()); eager_test::InitEnv(paddle::platform::CPUPlace());
// Prepare Inputs // Prepare Inputs
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
paddle::experimental::Tensor tensor = paddle::Tensor tensor =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -103,9 +103,9 @@ TEST(Backward, SingleNodeCustomGrad) { ...@@ -103,9 +103,9 @@ TEST(Backward, SingleNodeCustomGrad) {
false /*is_leaf*/); false /*is_leaf*/);
target_tensors.emplace_back(std::move(tensor)); target_tensors.emplace_back(std::move(tensor));
std::vector<paddle::experimental::Tensor> grad_tensors; std::vector<paddle::Tensor> grad_tensors;
// Create Grad Tensor // Create Grad Tensor
paddle::experimental::Tensor grad_tensor = paddle::Tensor grad_tensor =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -114,7 +114,7 @@ TEST(Backward, SingleNodeCustomGrad) { ...@@ -114,7 +114,7 @@ TEST(Backward, SingleNodeCustomGrad) {
false /*is_leaf*/); false /*is_leaf*/);
grad_tensors.emplace_back(std::move(grad_tensor)); grad_tensors.emplace_back(std::move(grad_tensor));
paddle::experimental::Tensor leaf_tensor; paddle::Tensor leaf_tensor;
{ {
// Create Scale Node // Create Scale Node
auto node0_ptr = std::make_shared<GradNodeScale>(1, 1); auto node0_ptr = std::make_shared<GradNodeScale>(1, 1);
...@@ -162,11 +162,11 @@ TEST(Backward, LinearNodes) { ...@@ -162,11 +162,11 @@ TEST(Backward, LinearNodes) {
eager_test::InitEnv(paddle::platform::CPUPlace()); eager_test::InitEnv(paddle::platform::CPUPlace());
// Prepare Inputs // Prepare Inputs
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
paddle::experimental::Tensor tensor = paddle::Tensor tensor =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -175,7 +175,7 @@ TEST(Backward, LinearNodes) { ...@@ -175,7 +175,7 @@ TEST(Backward, LinearNodes) {
false /*is_leaf*/); false /*is_leaf*/);
target_tensors.emplace_back(std::move(tensor)); target_tensors.emplace_back(std::move(tensor));
paddle::experimental::Tensor leaf_tensor; paddle::Tensor leaf_tensor;
{ {
// Create Node0 // Create Node0
auto node0_ptr = std::make_shared<GradNodeScale>(1, 1); auto node0_ptr = std::make_shared<GradNodeScale>(1, 1);
...@@ -199,7 +199,7 @@ TEST(Backward, LinearNodes) { ...@@ -199,7 +199,7 @@ TEST(Backward, LinearNodes) {
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
// Connect Node0 -> Node1 via Edge // Connect Node0 -> Node1 via Edge
auto tmp_tensor = paddle::experimental::Tensor(); auto tmp_tensor = paddle::Tensor();
auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor); auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor);
meta0->SetStopGradient(false); meta0->SetStopGradient(false);
meta0->SetSingleOutRankWithSlot(0, 0); meta0->SetSingleOutRankWithSlot(0, 0);
...@@ -241,15 +241,15 @@ TEST(Backward, WithAccumulation) { ...@@ -241,15 +241,15 @@ TEST(Backward, WithAccumulation) {
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::experimental::Tensor tensor0 = paddle::Tensor tensor0 =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
phi::DataLayout::NCHW, phi::DataLayout::NCHW,
1.0 /*value*/, 1.0 /*value*/,
false /*is_leaf*/); false /*is_leaf*/);
paddle::experimental::Tensor tensor1 = paddle::Tensor tensor1 =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -260,15 +260,15 @@ TEST(Backward, WithAccumulation) { ...@@ -260,15 +260,15 @@ TEST(Backward, WithAccumulation) {
target_tensors.emplace_back(std::move(tensor1)); target_tensors.emplace_back(std::move(tensor1));
// Create Grad Tensor // Create Grad Tensor
std::vector<paddle::experimental::Tensor> grad_tensors; std::vector<paddle::Tensor> grad_tensors;
paddle::experimental::Tensor grad_tensor0 = paddle::Tensor grad_tensor0 =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
phi::DataLayout::NCHW, phi::DataLayout::NCHW,
5.0 /*value*/, 5.0 /*value*/,
false /*is_leaf*/); false /*is_leaf*/);
paddle::experimental::Tensor grad_tensor1 = paddle::Tensor grad_tensor1 =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -278,7 +278,7 @@ TEST(Backward, WithAccumulation) { ...@@ -278,7 +278,7 @@ TEST(Backward, WithAccumulation) {
grad_tensors.emplace_back(std::move(grad_tensor0)); grad_tensors.emplace_back(std::move(grad_tensor0));
grad_tensors.emplace_back(std::move(grad_tensor1)); grad_tensors.emplace_back(std::move(grad_tensor1));
paddle::experimental::Tensor leaf_tensor; paddle::Tensor leaf_tensor;
{ {
// Create Node0 // Create Node0
auto node0_ptr = std::make_shared<GradNodeScale>(1, 1); auto node0_ptr = std::make_shared<GradNodeScale>(1, 1);
...@@ -309,7 +309,7 @@ TEST(Backward, WithAccumulation) { ...@@ -309,7 +309,7 @@ TEST(Backward, WithAccumulation) {
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
// Connect Node0 -> Node2 via Edge // Connect Node0 -> Node2 via Edge
auto tmp_tensor0 = paddle::experimental::Tensor(); auto tmp_tensor0 = paddle::Tensor();
auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0); auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0);
meta0->SetStopGradient(false); meta0->SetStopGradient(false);
meta0->SetSingleOutRankWithSlot(0, 0); meta0->SetSingleOutRankWithSlot(0, 0);
...@@ -317,7 +317,7 @@ TEST(Backward, WithAccumulation) { ...@@ -317,7 +317,7 @@ TEST(Backward, WithAccumulation) {
node0_ptr->SetGradOutMeta(tmp_tensor0, 0); node0_ptr->SetGradOutMeta(tmp_tensor0, 0);
// Connect Node1 -> Node2 via Edge // Connect Node1 -> Node2 via Edge
auto tmp_tensor1 = paddle::experimental::Tensor(); auto tmp_tensor1 = paddle::Tensor();
auto* meta1 = EagerUtils::autograd_meta(&tmp_tensor1); auto* meta1 = EagerUtils::autograd_meta(&tmp_tensor1);
meta1->SetStopGradient(false); meta1->SetStopGradient(false);
meta1->SetSingleOutRankWithSlot(0, 0); meta1->SetSingleOutRankWithSlot(0, 0);
......
...@@ -35,10 +35,10 @@ namespace egr { ...@@ -35,10 +35,10 @@ namespace egr {
TEST(CrossBatchAccumulation, SingleScaleNode) { TEST(CrossBatchAccumulation, SingleScaleNode) {
eager_test::InitEnv(paddle::platform::CPUPlace()); eager_test::InitEnv(paddle::platform::CPUPlace());
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
paddle::experimental::Tensor tensor = paddle::Tensor tensor =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -46,9 +46,9 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { ...@@ -46,9 +46,9 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
1.0 /*value*/, 1.0 /*value*/,
false /*is_leaf*/); false /*is_leaf*/);
target_tensors.emplace_back(std::move(tensor)); target_tensors.emplace_back(std::move(tensor));
paddle::experimental::Tensor& target_tensor = target_tensors[0]; paddle::Tensor& target_tensor = target_tensors[0];
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); paddle::Tensor leaf_tensor = paddle::Tensor();
auto scale_node_ptr = std::make_shared<GradNodeScale>(1, 1); auto scale_node_ptr = std::make_shared<GradNodeScale>(1, 1);
scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); scale_node_ptr->SetAttributes_scale(5.0 /*scale*/);
......
...@@ -38,7 +38,7 @@ TEST(EagerUtils, AutoGradMeta) { ...@@ -38,7 +38,7 @@ TEST(EagerUtils, AutoGradMeta) {
.get(), .get(),
meta); meta);
dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0; dt0->mutable_data<float>(paddle::platform::CPUPlace())[0] = 10.0;
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0); paddle::Tensor et0 = paddle::Tensor(dt0);
std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
...@@ -46,7 +46,7 @@ TEST(EagerUtils, AutoGradMeta) { ...@@ -46,7 +46,7 @@ TEST(EagerUtils, AutoGradMeta) {
.get(), .get(),
meta); meta);
dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0; dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] = 20.0;
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); paddle::Tensor et1 = paddle::Tensor(dt1);
// unsafe_autograd_meta() // unsafe_autograd_meta()
// autograd_meta() // autograd_meta()
...@@ -58,7 +58,7 @@ TEST(EagerUtils, AutoGradMeta) { ...@@ -58,7 +58,7 @@ TEST(EagerUtils, AutoGradMeta) {
CHECK_NOTNULL(unsafe_autograd_meta_after); CHECK_NOTNULL(unsafe_autograd_meta_after);
// NOTE: Since autograd_meta will be copied make sure it's not null // NOTE: Since autograd_meta will be copied make sure it's not null
std::vector<paddle::experimental::Tensor> ets = {et0, et1}; std::vector<paddle::Tensor> ets = {et0, et1};
auto test_node = std::make_shared<eager_test::GradTestNode>(); auto test_node = std::make_shared<eager_test::GradTestNode>();
std::vector<AutogradMeta*> autograd_metas = EagerUtils::autograd_meta(&ets); std::vector<AutogradMeta*> autograd_metas = EagerUtils::autograd_meta(&ets);
...@@ -103,11 +103,10 @@ TEST(EagerUtils, AutoGradMeta) { ...@@ -103,11 +103,10 @@ TEST(EagerUtils, AutoGradMeta) {
} }
template <typename T> template <typename T>
paddle::experimental::Tensor CreateTestCPUTensor( paddle::Tensor CreateTestCPUTensor(T val, const paddle::framework::DDim& ddim) {
T val, const paddle::framework::DDim& ddim) {
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, ddim); phi::DenseTensorMeta(phi::DataType::FLOAT32, ddim);
paddle::experimental::Tensor tensor; paddle::Tensor tensor;
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>( std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()) paddle::platform::CPUPlace())
...@@ -189,8 +188,8 @@ TEST(EagerUtils, TrySyncToVar) { ...@@ -189,8 +188,8 @@ TEST(EagerUtils, TrySyncToVar) {
TEST(EagerUtils, TrySyncToVars) { TEST(EagerUtils, TrySyncToVars) {
paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
std::vector<paddle::experimental::Tensor> tensors = { std::vector<paddle::Tensor> tensors = {CreateTestCPUTensor(1.0f, ddim),
CreateTestCPUTensor(1.0f, ddim), CreateTestCPUTensor(2.0f, ddim)}; CreateTestCPUTensor(2.0f, ddim)};
std::vector<std::shared_ptr<egr::EagerVariable>> var_bases = std::vector<std::shared_ptr<egr::EagerVariable>> var_bases =
egr::EagerUtils::TrySyncToVars(tensors); egr::EagerUtils::TrySyncToVars(tensors);
...@@ -231,7 +230,7 @@ TEST(EagerUtils, CreateVars) { ...@@ -231,7 +230,7 @@ TEST(EagerUtils, CreateVars) {
TEST(EagerUtils, GetGradAccumulationNode) { TEST(EagerUtils, GetGradAccumulationNode) {
VLOG(6) << "Check GetGradAccumulationNode"; VLOG(6) << "Check GetGradAccumulationNode";
paddle::experimental::Tensor t0("test_tensor"); paddle::Tensor t0("test_tensor");
ASSERT_EQ(egr::EagerUtils::GetGradAccumulationNode(t0), nullptr); ASSERT_EQ(egr::EagerUtils::GetGradAccumulationNode(t0), nullptr);
auto autograd_ptr0 = egr::EagerUtils::autograd_meta(&t0); auto autograd_ptr0 = egr::EagerUtils::autograd_meta(&t0);
autograd_ptr0->SetStopGradient(true); autograd_ptr0->SetStopGradient(true);
...@@ -252,9 +251,8 @@ TEST(EagerUtils, GetGradAccumulationNode) { ...@@ -252,9 +251,8 @@ TEST(EagerUtils, GetGradAccumulationNode) {
} }
TEST(EagerUtils, FillZeroForEmptyOptionalGradInput) { TEST(EagerUtils, FillZeroForEmptyOptionalGradInput) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize> grads = {std::vector<paddle::Tensor>(1)};
grads = {std::vector<paddle::experimental::Tensor>(1)};
paddle::small_vector<std::vector<GradSlotMeta>, egr::kSlotSmallVectorSize> paddle::small_vector<std::vector<GradSlotMeta>, egr::kSlotSmallVectorSize>
slot_metas = {std::vector<GradSlotMeta>(1)}; slot_metas = {std::vector<GradSlotMeta>(1)};
......
...@@ -35,11 +35,11 @@ TEST(Forward, SingleNode) { ...@@ -35,11 +35,11 @@ TEST(Forward, SingleNode) {
eager_test::InitEnv(paddle::platform::CPUPlace()); eager_test::InitEnv(paddle::platform::CPUPlace());
// Prepare Inputs // Prepare Inputs
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
paddle::experimental::Tensor t = paddle::Tensor t =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -47,13 +47,13 @@ TEST(Forward, SingleNode) { ...@@ -47,13 +47,13 @@ TEST(Forward, SingleNode) {
5.0 /*value*/, 5.0 /*value*/,
false /*is_leaf*/); false /*is_leaf*/);
target_tensors.emplace_back(std::move(t)); target_tensors.emplace_back(std::move(t));
paddle::experimental::Tensor& tensor = target_tensors[0]; paddle::Tensor& tensor = target_tensors[0];
EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); EagerUtils::autograd_meta(&tensor)->SetStopGradient(false);
// Run Forward // Run Forward
float scale = 2.0; float scale = 2.0;
float bias = 3.0; float bias = 3.0;
paddle::experimental::Tensor out = egr::scale( paddle::Tensor out = egr::scale(
tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/); tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output // Examine Forward Output
...@@ -85,11 +85,11 @@ TEST(Forward, LinearNodes) { ...@@ -85,11 +85,11 @@ TEST(Forward, LinearNodes) {
eager_test::InitEnv(paddle::platform::CPUPlace()); eager_test::InitEnv(paddle::platform::CPUPlace());
// Prepare Inputs // Prepare Inputs
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
paddle::experimental::Tensor t = paddle::Tensor t =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -97,22 +97,22 @@ TEST(Forward, LinearNodes) { ...@@ -97,22 +97,22 @@ TEST(Forward, LinearNodes) {
5.0 /*value*/, 5.0 /*value*/,
false /*is_leaf*/); false /*is_leaf*/);
target_tensors.emplace_back(std::move(t)); target_tensors.emplace_back(std::move(t));
paddle::experimental::Tensor& tensor = target_tensors[0]; paddle::Tensor& tensor = target_tensors[0];
EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); EagerUtils::autograd_meta(&tensor)->SetStopGradient(false);
// Run Forward Node 0 // Run Forward Node 0
float scale0 = 2.0; float scale0 = 2.0;
float bias0 = 3.0; float bias0 = 3.0;
paddle::experimental::Tensor out0 = egr::scale(tensor, paddle::Tensor out0 = egr::scale(tensor,
scale0, scale0,
bias0, bias0,
true /*bias_after_scale*/, true /*bias_after_scale*/,
true /*trace_backward*/); true /*trace_backward*/);
// Run Forward Node 1 // Run Forward Node 1
float scale1 = 5.0; float scale1 = 5.0;
float bias1 = 10.0; float bias1 = 10.0;
paddle::experimental::Tensor out1 = egr::scale( paddle::Tensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output 0 // Examine Forward Output 0
...@@ -171,11 +171,11 @@ TEST(Forward, BranchedNodes) { ...@@ -171,11 +171,11 @@ TEST(Forward, BranchedNodes) {
eager_test::InitEnv(paddle::platform::CPUPlace()); eager_test::InitEnv(paddle::platform::CPUPlace());
// Prepare Inputs // Prepare Inputs
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
paddle::experimental::Tensor t = paddle::Tensor t =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -183,28 +183,28 @@ TEST(Forward, BranchedNodes) { ...@@ -183,28 +183,28 @@ TEST(Forward, BranchedNodes) {
5.0 /*value*/, 5.0 /*value*/,
false /*is_leaf*/); false /*is_leaf*/);
target_tensors.emplace_back(std::move(t)); target_tensors.emplace_back(std::move(t));
paddle::experimental::Tensor& tensor = target_tensors[0]; paddle::Tensor& tensor = target_tensors[0];
EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); EagerUtils::autograd_meta(&tensor)->SetStopGradient(false);
// Run Forward Node 0 // Run Forward Node 0
float scale0 = 2.0; float scale0 = 2.0;
float bias0 = 3.0; float bias0 = 3.0;
paddle::experimental::Tensor out0 = egr::scale(tensor, paddle::Tensor out0 = egr::scale(tensor,
scale0, scale0,
bias0, bias0,
true /*bias_after_scale*/, true /*bias_after_scale*/,
true /*trace_backward*/); true /*trace_backward*/);
// Run Forward Node 1 // Run Forward Node 1
float scale1 = 5.0; float scale1 = 5.0;
float bias1 = 10.0; float bias1 = 10.0;
paddle::experimental::Tensor out1 = egr::scale( paddle::Tensor out1 = egr::scale(
out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/);
// Run Forward Node 2 // Run Forward Node 2
float scale2 = 10.0; float scale2 = 10.0;
float bias2 = 20.0; float bias2 = 20.0;
paddle::experimental::Tensor out2 = egr::scale( paddle::Tensor out2 = egr::scale(
out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/); out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/);
// Examine Forward Output 0 // Examine Forward Output 0
......
...@@ -44,21 +44,21 @@ TEST(Generated, Sigmoid) { ...@@ -44,21 +44,21 @@ TEST(Generated, Sigmoid) {
// 1. Prepare Input // 1. Prepare Input
paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4});
VLOG(6) << "Make Dim"; VLOG(6) << "Make Dim";
paddle::experimental::Tensor tensor = paddle::Tensor tensor =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
phi::DataLayout::NCHW, phi::DataLayout::NCHW,
0.0, 0.0,
true); true);
VLOG(6) << "Make paddle::experimental::Tensor"; VLOG(6) << "Make paddle::Tensor";
egr_utils_api::RetainGradForTensor(tensor); egr_utils_api::RetainGradForTensor(tensor);
VLOG(6) << "Retain Grad for Tensor"; VLOG(6) << "Retain Grad for Tensor";
auto output_tensor = sigmoid_dygraph_function(tensor, {}); auto output_tensor = sigmoid_dygraph_function(tensor, {});
VLOG(6) << "Run Backward"; VLOG(6) << "Run Backward";
eager_test::CompareTensorWithValue<float>(output_tensor, 0.5); eager_test::CompareTensorWithValue<float>(output_tensor, 0.5);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor}; std::vector<paddle::Tensor> target_tensors = {output_tensor};
VLOG(6) << "Runing Backward"; VLOG(6) << "Runing Backward";
Backward(target_tensors, {}); Backward(target_tensors, {});
...@@ -75,7 +75,7 @@ TEST(Generated, Matmul_v2) { ...@@ -75,7 +75,7 @@ TEST(Generated, Matmul_v2) {
// 1. Prepare Input // 1. Prepare Input
paddle::framework::DDim ddimX = phi::make_ddim({4, 16}); paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
paddle::experimental::Tensor X = paddle::Tensor X =
egr_utils_api::CreateTensorWithValue(ddimX, egr_utils_api::CreateTensorWithValue(ddimX,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -85,7 +85,7 @@ TEST(Generated, Matmul_v2) { ...@@ -85,7 +85,7 @@ TEST(Generated, Matmul_v2) {
egr_utils_api::RetainGradForTensor(X); egr_utils_api::RetainGradForTensor(X);
paddle::framework::DDim ddimY = phi::make_ddim({16, 20}); paddle::framework::DDim ddimY = phi::make_ddim({16, 20});
paddle::experimental::Tensor Y = paddle::Tensor Y =
egr_utils_api::CreateTensorWithValue(ddimY, egr_utils_api::CreateTensorWithValue(ddimY,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -99,7 +99,7 @@ TEST(Generated, Matmul_v2) { ...@@ -99,7 +99,7 @@ TEST(Generated, Matmul_v2) {
eager_test::CompareTensorWithValue<float>(output_tensor, 96); eager_test::CompareTensorWithValue<float>(output_tensor, 96);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor}; std::vector<paddle::Tensor> target_tensors = {output_tensor};
Backward(target_tensors, {}); Backward(target_tensors, {});
eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20); eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
...@@ -115,7 +115,7 @@ TEST(Generated, ElementwiseAdd) { ...@@ -115,7 +115,7 @@ TEST(Generated, ElementwiseAdd) {
// 1. Prepare Input // 1. Prepare Input
paddle::framework::DDim ddimX = phi::make_ddim({4, 16}); paddle::framework::DDim ddimX = phi::make_ddim({4, 16});
paddle::experimental::Tensor X = paddle::Tensor X =
egr_utils_api::CreateTensorWithValue(ddimX, egr_utils_api::CreateTensorWithValue(ddimX,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -125,7 +125,7 @@ TEST(Generated, ElementwiseAdd) { ...@@ -125,7 +125,7 @@ TEST(Generated, ElementwiseAdd) {
egr_utils_api::RetainGradForTensor(X); egr_utils_api::RetainGradForTensor(X);
paddle::framework::DDim ddimY = phi::make_ddim({4, 16}); paddle::framework::DDim ddimY = phi::make_ddim({4, 16});
paddle::experimental::Tensor Y = paddle::Tensor Y =
egr_utils_api::CreateTensorWithValue(ddimY, egr_utils_api::CreateTensorWithValue(ddimY,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -138,7 +138,7 @@ TEST(Generated, ElementwiseAdd) { ...@@ -138,7 +138,7 @@ TEST(Generated, ElementwiseAdd) {
eager_test::CompareTensorWithValue<float>(output_tensor, 5); eager_test::CompareTensorWithValue<float>(output_tensor, 5);
std::vector<paddle::experimental::Tensor> target_tensors = {output_tensor}; std::vector<paddle::Tensor> target_tensors = {output_tensor};
Backward(target_tensors, {}); Backward(target_tensors, {});
eager_test::CompareGradTensorWithValue<float>(X, 1.0); eager_test::CompareGradTensorWithValue<float>(X, 1.0);
......
...@@ -83,17 +83,16 @@ TEST(NanInfUtils, Functions) { ...@@ -83,17 +83,16 @@ TEST(NanInfUtils, Functions) {
auto six_tensors = auto six_tensors =
std::make_tuple(tensor, tensor1, tensor2, tensor3, tensor4, tensor5); std::make_tuple(tensor, tensor1, tensor2, tensor3, tensor4, tensor5);
CHECK_NAN_INF(six_tensors); CHECK_NAN_INF(six_tensors);
std::vector<paddle::experimental::Tensor> tensor_vec; std::vector<paddle::Tensor> tensor_vec;
tensor_vec.emplace_back(tensor); tensor_vec.emplace_back(tensor);
tensor_vec.emplace_back(tensor1); tensor_vec.emplace_back(tensor1);
CHECK_NAN_INF(tensor_vec); CHECK_NAN_INF(tensor_vec);
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
egr::kSlotSmallVectorSize>
small_vec; small_vec;
small_vec.emplace_back(tensor_vec); small_vec.emplace_back(tensor_vec);
CHECK_NAN_INF(small_vec); CHECK_NAN_INF(small_vec);
// test selected_rows // test selected_rows
paddle::experimental::Tensor tensor_sr; paddle::Tensor tensor_sr;
auto sr = std::make_shared<phi::SelectedRows>(); auto sr = std::make_shared<phi::SelectedRows>();
*sr->mutable_value() = *sr->mutable_value() =
*(static_cast<const phi::DenseTensor*>(tensor.impl().get())); *(static_cast<const phi::DenseTensor*>(tensor.impl().get()));
......
...@@ -33,11 +33,11 @@ TEST(TensorUtils, Test) { ...@@ -33,11 +33,11 @@ TEST(TensorUtils, Test) {
eager_test::InitEnv(paddle::platform::CPUPlace()); eager_test::InitEnv(paddle::platform::CPUPlace());
// Prepare Inputs // Prepare Inputs
std::vector<paddle::experimental::Tensor> target_tensors; std::vector<paddle::Tensor> target_tensors;
paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32});
// Create Target Tensor // Create Target Tensor
paddle::experimental::Tensor t = paddle::Tensor t =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
...@@ -45,7 +45,7 @@ TEST(TensorUtils, Test) { ...@@ -45,7 +45,7 @@ TEST(TensorUtils, Test) {
5.0 /*value*/, 5.0 /*value*/,
true /*is_leaf*/); true /*is_leaf*/);
paddle::experimental::Tensor t_grad = paddle::Tensor t_grad =
egr_utils_api::CreateTensorWithValue(ddim, egr_utils_api::CreateTensorWithValue(ddim,
paddle::platform::CPUPlace(), paddle::platform::CPUPlace(),
phi::DataType::FLOAT32, phi::DataType::FLOAT32,
......
...@@ -28,8 +28,7 @@ ...@@ -28,8 +28,7 @@
namespace eager_test { namespace eager_test {
template <typename T> template <typename T>
bool CompareGradTensorWithValue(const paddle::experimental::Tensor& target, bool CompareGradTensorWithValue(const paddle::Tensor& target, T value) {
T value) {
egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target); egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target);
auto grad_dense = auto grad_dense =
std::dynamic_pointer_cast<phi::DenseTensor>(meta->Grad().impl()); std::dynamic_pointer_cast<phi::DenseTensor>(meta->Grad().impl());
...@@ -68,8 +67,7 @@ bool CompareGradTensorWithValue(const paddle::experimental::Tensor& target, ...@@ -68,8 +67,7 @@ bool CompareGradTensorWithValue(const paddle::experimental::Tensor& target,
} }
template <typename T> template <typename T>
bool CompareTensorWithValue(const paddle::experimental::Tensor& target, bool CompareTensorWithValue(const paddle::Tensor& target, T value) {
T value) {
// TODO(jiabin): Support Selected Rows later // TODO(jiabin): Support Selected Rows later
auto dense_t = std::dynamic_pointer_cast<phi::DenseTensor>(target.impl()); auto dense_t = std::dynamic_pointer_cast<phi::DenseTensor>(target.impl());
T* ptr = dense_t->data<T>(); T* ptr = dense_t->data<T>();
......
此差异已折叠。
此差异已折叠。
...@@ -57,13 +57,12 @@ TEST(Test__SelectedRowsMerge_Test, SelectedRowsMerge) { ...@@ -57,13 +57,12 @@ TEST(Test__SelectedRowsMerge_Test, SelectedRowsMerge) {
} }
} }
// new 2 phi::Tensor // new 2 phi::Tensor
paddle::experimental::Tensor t1(sr1); paddle::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2); paddle::Tensor t2(sr2);
// call SelectedRowsMerge // call SelectedRowsMerge
auto new_buffer = auto new_buffer =
paddle::imperative::SelectedRowsMerge<paddle::experimental::Tensor>(t1, paddle::imperative::SelectedRowsMerge<paddle::Tensor>(t1, t2);
t2);
auto* new_buffer_tensor = auto* new_buffer_tensor =
static_cast<phi::SelectedRows*>(new_buffer->impl().get()); static_cast<phi::SelectedRows*>(new_buffer->impl().get());
auto* new_buffer_data_sr1 = auto* new_buffer_data_sr1 =
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册