未验证 提交 69a4a39f 编写于 作者: W Weilong Wu 提交者: GitHub

merge develop (#43995)

上级 7f958728
......@@ -6,6 +6,7 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
set(fluid_manual_nodes
${fluid_manual_nodes}
PARENT_SCOPE)
add_subdirectory(eager_manual)
set(eager_manual_functions
${eager_manual_functions}
......
......@@ -16,6 +16,9 @@
#include "paddle/phi/api/include/tensor.h"
paddle::experimental::Tensor add_n_final_state_dygraph_function(
const std::vector<paddle::experimental::Tensor>& x);
paddle::experimental::Tensor conv2d_final_state_dygraph_function(
const paddle::experimental::Tensor& input,
const paddle::experimental::Tensor& filter,
......
cc_library(
add_n_fwd_func
SRCS add_n_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(add_n_fwd_func eager_codegen)
cc_library(
conv2d_fwd_function
SRCS conv2d_fwd_function.cc
......@@ -6,5 +13,5 @@ cc_library(
add_dependencies(conv2d_fwd_function eager_codegen)
set(eager_manual_functions
conv2d_fwd_function
conv2d_fwd_function add_n_fwd_func
PARENT_SCOPE)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#pragma GCC diagnostic ignored "-Wunused-variable"
DECLARE_bool(check_nan_inf);
paddle::experimental::Tensor add_n_final_state_dygraph_function(
const std::vector<paddle::experimental::Tensor>& x) {
// Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event(
"add_n dygraph", paddle::platform::TracerEventType::Operator, 1);
// AMP Logic
if (egr::Controller::Instance().GetAMPLevel() !=
paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP";
auto op_name = phi::TransToFluidOpName("add_n");
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
amp_tensors_vector = {x};
auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
auto NEW_x = egr::EagerAmpAutoCasts("x", x, amp_dst_dtype, op_name);
{
paddle::imperative::AutoCastGuard guard(
egr::Controller::Instance().GetCurrentTracer(),
paddle::imperative::AmpLevel::O0);
return add_n_final_state_dygraph_function(NEW_x);
}
}
// Get Input AutoGradMeta
std::vector<egr::AutogradMeta*> x_autograd_meta_vec =
egr::EagerUtils::nullable_autograd_meta(x);
std::vector<egr::AutogradMeta*>* x_autograd_meta = &x_autograd_meta_vec;
// Forward API Call
VLOG(3) << "Final State Running: "
<< "add_n_final_state_dygraph_function";
auto api_result = paddle::experimental::add_n(x);
// Check NaN and Inf if needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("add_n", api_result);
}
// Get Outputs
auto& out = api_result;
// Get Output AutoGradMeta
egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
bool trace_backward = egr::Controller::Instance().HasGrad();
bool require_any_grad =
egr::EagerUtils::ComputeRequireGrad(trace_backward, x_autograd_meta);
// Check Inplace if needed
// Node Creation
if (require_any_grad) {
paddle::platform::RecordEvent node_creation_record_event(
"add_n node_creation",
paddle::platform::TracerEventType::OperatorInner,
1);
egr::EagerUtils::PassStopGradient(false, out_autograd_meta);
// Node Construction
auto grad_node =
std::shared_ptr<AddNGradNodeFinal>(new AddNGradNodeFinal(1, 1));
// SetAttributes if needed
// Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperx(x);
// SetGradOutMeta & SetEdges
grad_node->SetGradOutMeta(x, 0);
// SetOutRank & SetHistory & SetGradInMeta & RetainGrad
if (out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
}
if (out_autograd_meta) {
egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
}
grad_node->SetGradInMeta(out, 0);
egr::EagerUtils::CheckAndRetainGrad(out);
// Set TensorWrappers for Forward Outputs if needed
}
// Returns
return out;
}
cc_library(
add_n_node
SRCS add_n_node.cc
DEPS ${eager_deps} ${fluid_deps})
cc_library(
conv2d_nodes
SRCS conv2d_nodes.cc
DEPS ${eager_deps} ${fluid_deps})
set(eager_manual_nodes
conv2d_nodes
conv2d_nodes add_n_node
PARENT_SCOPE)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h"
#include "paddle/phi/api/lib/api_custom_impl.h"
DECLARE_bool(check_nan_inf);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
AddNGradNodeFinal::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>& grads,
bool create_graph,
bool is_new_grad) {
// Fill Zero For GradIn Tensors
// Apply Gradient Hooks
auto hooked_grads = ApplyGradientHooks(grads);
// Collect GradIn Tensors, Attrs and Recovered TensorWrappers
auto x = egr::EagerUtils::RecoverTensorWrapper(&this->x_);
auto& out_grad = hooked_grads[0][0];
// Prepare Grad function call
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
returns(1);
for (int i = 0; i < 1; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1)
: returns[i].resize(out_metas[i].size());
}
std::vector<paddle::experimental::Tensor*> api_output_0;
api_output_0.reserve(returns[0].size());
for (size_t i = 0; i < returns[0].size(); ++i) {
if (out_metas[0].empty() || out_metas[0][i].IsStopGradient()) {
api_output_0.push_back(nullptr);
} else {
api_output_0.push_back(&returns[0][i]);
}
}
// Call grad_api function
VLOG(3) << "Final State Running: AddNGradNodeFinal";
// dygraph function
for (size_t i = 0; i < returns[0].size(); i++) {
returns[0][i] = ::scale_final_state_dygraph_function(
out_grad, phi::Scalar(1.0), 0.0, true);
}
// Check NaN and Inf id needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("add_n_grad", returns);
}
if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);
return returns;
}
......@@ -15,6 +15,7 @@
#pragma once
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tensor_wrapper.h"
#include "paddle/fluid/imperative/tracer.h"
class Conv2dGradNodeFinal : public egr::GradNodeBase {
public:
......@@ -180,3 +181,49 @@ class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
int workspace_size_MB_;
bool exhaustive_search_;
};
class AddNGradNodeFinal : public egr::GradNodeBase {
public:
AddNGradNodeFinal() : egr::GradNodeBase() {}
AddNGradNodeFinal(size_t bwd_in_slot_num, size_t bwd_out_slot_num)
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~AddNGradNodeFinal() override = default;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, // NOLINT
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
std::string name() override { return "AddNGradNodeFinal"; }
void ClearTensorWrappers() override {
for (auto& tw : x_) {
tw.clear();
}
SetIsTensorWrappersCleared(true);
}
std::shared_ptr<GradNodeBase> Copy() const override {
auto copied_node =
std::shared_ptr<AddNGradNodeFinal>(new AddNGradNodeFinal(*this));
return copied_node;
}
// SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperx(const std::vector<paddle::experimental::Tensor>& x) {
for (const auto& eager_tensor : x) {
x_.emplace_back(egr::TensorWrapper(eager_tensor, true));
}
}
// SetAttributes
private:
// TensorWrappers
std::vector<egr::TensorWrapper> x_;
// Attributes
};
......@@ -41,7 +41,7 @@ from codegen_utils import AssertMessage, GetIndent
# and this will be fixed in the futrue.
inplace_check_blacklist = set(["assign_out_"])
# # --- Black Ops list that's NO NEED to apply backward code generation
black_ops_list = ["conv2d", "conv2d_grad", "conv2d_grad_grad"]
black_ops_list = ["conv2d", "conv2d_grad", "conv2d_grad_grad", "add_n"]
###########
......@@ -283,6 +283,7 @@ NODE_H_FILE_TEMPLATE = \
#pragma once
#include "paddle/fluid/eager/tensor_wrapper.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
{}
"""
......@@ -316,6 +317,7 @@ FORWARD_H_FILE_TEMPLATE = \
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/eager/to_static/run_program_op_func.h"
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
{}
{}
"""
......@@ -1648,6 +1650,8 @@ class DygraphForwardAndNodesGenerator(GeneratorBase):
namespace = self.namespace
for forward_api_contents in forward_api_list:
if forward_api_contents['api'] in black_ops_list: continue
backward_api_contents = self.GetBackwardAPIContents(
forward_api_contents)
if backward_api_contents is None: continue
......
......@@ -871,49 +871,6 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
////////////////// Backward(grad) api impls //////////////////////
// TODO(chenweihang): the original sum grad op can support higher-level
// differentiation,
// but if we use this impl, it will not support. We need to be able to reuse
// the autograd API here, which is not yet implemented
// TODO(chenweihang): we should support call generated api in custom api impl
void add_n_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad,
std::vector<Tensor*> x_grad) {
auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad);
auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey();
Backend kernel_backend = kernel_key.backend();
DataLayout kernel_layout = kernel_key.layout();
DataType kernel_data_type = kernel_key.dtype();
auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
"scale", {kernel_backend, kernel_layout, kernel_data_type});
VLOG(6) << "add_n_grad API kernel key: [" << kernel_backend << ", "
<< kernel_layout << ", " << kernel_data_type << "]";
VLOG(6) << "add_n_grad API kernel: " << kernel;
auto* dev_ctx = GetDeviceContextByBackend(kernel_backend);
auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {});
auto dense_x_grad = SetKernelOutput(&x_grad);
using kernel_signature = void (*)(const platform::DeviceContext&,
const phi::DenseTensor&,
const phi::Scalar&,
float,
bool,
phi::DenseTensor*);
auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
for (auto* dense_x_grad_t : dense_x_grad) {
phi::MetaTensor meta_out(dense_x_grad_t);
phi::UnchangedInferMeta(MakeMetaTensor(*dense_out_grad), &meta_out);
(*kernel_fn)(
*dev_ctx, *dense_out_grad, phi::Scalar(1.0), 0.0, true, dense_x_grad_t);
}
}
std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor, Tensor> batch_norm_impl(
const Tensor& x,
const Tensor& scale,
......
......@@ -116,10 +116,6 @@ std::tuple<Tensor, Tensor, Tensor> momentum_impl(
////////////////// Backward(grad) api impls //////////////////////
void add_n_grad_impl(const std::vector<Tensor>& x,
const Tensor& out_grad,
std::vector<Tensor*> x_grad);
void conv2d_grad_impl(const Tensor& input,
const Tensor& filter,
const Tensor& out_grad,
......
......@@ -71,13 +71,6 @@
backward : add_double_grad
inplace : (out_grad -> x_grad)
- backward_api : add_n_grad
forward : add_n (Tensor[] x) -> Tensor(out)
args : (Tensor[] x, Tensor out_grad)
output : Tensor[](x_grad){x.size()}
invoke : add_n_grad_impl(x, out_grad, x_grad)
no_need_buffer : x
- backward_api : add_triple_grad
forward : add_double_grad (Tensor y, Tensor grad_out, Tensor grad_grad_x, Tensor grad_grad_y, int axis = -1) -> Tensor(grad_grad_out)
args : (Tensor grad_grad_x, Tensor grad_grad_y, Tensor grad_grad_out_grad, int axis = -1)
......
......@@ -384,6 +384,29 @@ class API_Test_Add_n(unittest.TestCase):
self.assertEqual(
(input1.grad.numpy() == expected_grad_result).all(), True)
def test_add_n_and_add_and_grad(self):
with fluid.dygraph.guard():
np_x = np.array([[1, 2, 3], [4, 5, 6]])
np_y = [[7, 8, 9], [10, 11, 12]]
np_z = [[1, 1, 1], [1, 1, 1]]
x = paddle.to_tensor(np_x, dtype='float32', stop_gradient=False)
y = paddle.to_tensor(np_y, dtype='float32', stop_gradient=False)
z = paddle.to_tensor(np_z, dtype='float32')
out1 = x + z
out2 = y + z
out = paddle.add_n([out1, out2])
dx, dy = paddle.grad([out], [x, y], create_graph=True)
expected_out = np.array([[10., 12., 14.], [16., 18., 20.]])
expected_dx = np.array([[1, 1, 1], [1, 1, 1]])
expected_dy = np.array([[1, 1, 1], [1, 1, 1]])
self.assertTrue(np.allclose(out, expected_out))
self.assertTrue(np.allclose(dx, expected_dx))
self.assertTrue(np.allclose(dy, expected_dy))
class TestRaiseSumError(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册