未验证 提交 c5ad3d06 编写于 作者: J Jiabin Yang 提交者: GitHub

Refactor dygraph to eager -- Autograd info (#37406)

* Add EagerTensor and tests

* remove useless enforce

* remove comment in cmake

* support autograd meta

* support grad node info test

* support grad_node_info

* add more edge test

* remove Python.h

* refine error code

* add error type in error msg

* given default null name for tensor
上级 83e55cff
...@@ -12,6 +12,5 @@ add_subdirectory(operators) ...@@ -12,6 +12,5 @@ add_subdirectory(operators)
add_subdirectory(string) add_subdirectory(string)
add_subdirectory(pybind) add_subdirectory(pybind)
add_subdirectory(eager) add_subdirectory(eager)
# NOTE: please add subdirectory inference at last. # NOTE: please add subdirectory inference at last.
add_subdirectory(inference) add_subdirectory(inference)
add_subdirectory(tests) add_subdirectory(tests)
cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api)
cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/autograd_meta.h"
// We Leave this to make autograd meta can be compiled as a single target.
namespace egr {} // namespace egr
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/grad_node_info.h"
namespace egr {
using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
/**
*
* AutogradMeta is what record the backward info for tensor. When we run
* computation
* graph eagerly, we can not build a static paddle program like static mode do,
* so we
* need a new method to record forward info to trace backward when we finish all
* forward
* computation. This require our AutogradMeta class record following main
* members
*
* 1. grad_op:
* Grad_op indicate the grad operation of the forward op
*
* 2. grad:
* Grad is the gradient of forward Tensor, which should be compute after
* backward computation
*
* NOTE: grad should only be available when current tensor is a leaf tensor, and
* for non-leaf
* tensor grad is only available while user set `retain_grad` option as `true`.
*
* TODO(jiabin) : support hooks
* 3. hooks:
* Hooks are some computation logic which only attached with backward operation,
* it registered
* by user and run before accumulator.
*
* 4.overrided_stop_gradient_
* This member is used to finish some auto-prune related work, which indicate
* user set stop_gradient
* should overrided the result indicated by framework. All non-parameter
* tensor's stop_gradient
* properties should be true. We will pass stop_gradient when we find one who
* need it.
*
* NOTE: AutogradMeta is inherited from AbstractAutogradMeta which is defined
* in tensor's deps,
* we did this to avoid additional dependency on Autograd. In eager execution,
* we will cast
* AbstractAutogradMeta as AutogradMeta to use it.
*
* **/
// No other AutogradMeta class should be derivated from AbstractAutogradMeta.
// It's only used by
class AutogradMeta : public AbstractAutogradMeta {
public:
explicit AutogradMeta(const Edge& edge = Edge()) {
out_slot_id_ = edge.GetEdgeRankInfo().first;
out_rank_ = edge.GetEdgeRankInfo().second;
grad_node_ = edge.GetMutableGradNode();
}
~AutogradMeta() override = default;
const egr::EagerTensor& Grad() const { return grad_; }
egr::EagerTensor* MutableGrad() { return &grad_; }
void SetGradNode(const std::shared_ptr<GradNodeBase>& grad_node) {
PADDLE_ENFORCE_NOT_NULL(
grad_node.get(),
paddle::platform::errors::InvalidArgument(
"Should Not set NULL as GradNode pointer, since "
"our default Edge and autogradMeta has nullptr for "
"grad node. Set Nullptr will lead error."));
grad_node_ = grad_node;
}
std::shared_ptr<GradNodeBase> GetMutableGradNode() const {
return grad_node_;
}
GradNodeBase* GradNode() const { return grad_node_.get(); }
void SetSingleOutRankWithSlot(size_t slot_id, size_t rank) {
out_slot_id_ = slot_id;
out_rank_ = rank;
}
std::pair</* slot id */ size_t, /* rank in slot */ size_t> OutRankInfo()
const {
return std::make_pair(out_slot_id_, out_rank_);
}
bool IsInitialized() { return grad_node_.get(); }
// TODO(jiabin): This may cause error, since -1 still can indication true;
bool StopGradient() const { return stop_gradient_ != 0; }
int NumericStopGradient() const { return stop_gradient_; }
void SetStopGradient(bool stop_gradient) {
stop_gradient_ = static_cast<int>(stop_gradient);
}
bool Persistable() const { return persistable_; }
void SetPersistable(bool persistable) { persistable_ = persistable; }
private:
// TODO(jiabin) :Should we use pointer instead of object?
egr::EagerTensor grad_;
// GradNodeBase is base class of all grad op which is a
// wrapper for grad op. This class will make grad op easy
// to be traced.
std::shared_ptr<GradNodeBase> grad_node_;
/**
* Why we need slot id here?
* Because in paddle most of our operators inputs and outputs
* are assemble in form of {"slot name", vector<tensor>}.
* So its better for us to set a slot id to fit this format. **/
size_t out_slot_id_;
// output rank of forward op, this is a vital num, since
// we are now trying to make our forward output is as same
// sequence as backward input. In case of tracing backward
// sequence we need to record output rank in slot here.
size_t out_rank_;
// TODO(jiabin) :Support hooks here and store it in AutogradMeta
// Stop gradient flag to indicate should we compute backward
int stop_gradient_{-1};
bool persistable_{false};
// TODO(jiabin) :Support Quantum here and add cache mechanism as
// VarCache defined in VarBase
};
} // namespace egr
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"
#include "glog/logging.h"
/**
* Implementation of GradNodeBase, Edge and InputBuffer.
**/
namespace egr {
GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
bwd_in_meta_.resize(bwd_in_slot_num);
bwd_out_meta_.resize(bwd_out_slot_num);
// adj_edges has the same num as backward outputs
adj_edges_.resize(bwd_out_slot_num);
}
void GradNodeBase::AddEdges(const std::vector<AutogradMeta*>& metas,
size_t slot_id) {
PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
for (const auto& meta : metas) {
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
}
}
void GradNodeBase::AddEdges(const AutogradMeta& meta, size_t slot_id) {
PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
adj_edges_[slot_id].emplace_back(meta.GetMutableGradNode(),
meta.OutRankInfo());
}
const std::vector<GradSlotMeta>& GradNodeBase::InputMeta() const {
return bwd_in_meta_;
}
const std::vector<GradSlotMeta>& GradNodeBase::OutputMeta() const {
return bwd_out_meta_;
}
void GradNodeBase::SetGradInMeta(const std::vector<AutogradMeta*>& fwd_out,
size_t slot_rank) {
size_t slot_size = fwd_out.size();
PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_in_meta_ size, since "
"bwd_in_meta_ is designed to hold as same num as backward "
"inputs."));
auto& meta = bwd_in_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be init once, addition "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back
meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) {
if (fwd_out[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(i, fwd_out[i]->StopGradient());
}
}
}
void GradNodeBase::SetGradInMeta(const AutogradMeta& fwd_out,
size_t slot_rank) {
PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_in_meta_ size, since "
"bwd_in_meta_ is designed to hold as same num as backward "
"inputs."));
auto& meta = bwd_in_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_in_meta should only be init once, Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back
VLOG(7) << "Init bwd_in_meta_ with slot rank: " << slot_rank;
meta.Init(1);
meta.SetStopGradient(0, fwd_out.StopGradient());
}
void GradNodeBase::SetGradOutMeta(const std::vector<AutogradMeta*>& fwd_in,
size_t slot_rank) {
size_t slot_size = fwd_in.size();
PADDLE_ENFORCE_LE(
slot_rank, (bwd_out_meta_.size() - 1),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_out_meta_ size, "
"since bwd_out_meta_ is designed to hold as same num as "
"backward outputs."));
auto& meta = bwd_out_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_out_meta should only be init once. Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back
meta.Init(slot_size);
for (size_t i = 0; i < slot_size; i++) {
if (fwd_in[i]->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(i, fwd_in[i]->StopGradient());
}
}
}
void GradNodeBase::SetGradOutMeta(const AutogradMeta& fwd_in,
size_t slot_rank) {
PADDLE_ENFORCE_LE(
(slot_rank + 1), bwd_out_meta_.size(),
paddle::platform::errors::InvalidArgument(
"Slot Rank should less equal than bwd_out_meta_ size, "
"since bwd_out_meta_ is designed to hold as same num as "
"backward outputs."));
auto& meta = bwd_out_meta_.at(slot_rank);
PADDLE_ENFORCE_EQ(meta.IsInitialized(), false,
paddle::platform::errors::PreconditionNotMet(
"Bwd_out_meta should only be init once. Additional "
"initialization for it is forbidden. If you got this "
"error, it indicates bugs in framework."));
// Init stop gradient vector before use to avoid push back
meta.Init(1);
meta.SetStopGradient(0, fwd_in.StopGradient());
}
void GradNodeBase::SetDefaultGradInOutMeta() {
PADDLE_ENFORCE((bwd_out_meta_.size() == 1) && (bwd_in_meta_.size() == 1),
paddle::platform::errors::PreconditionNotMet(
"We can only support 1 input and 1 output in default grad "
"meta setter, other size of inputs and outputs should "
"create with Setter and Getters"));
// Default stop_gradient is false and slot id is 0, slot size is 1;
bwd_out_meta_[0].Init(1);
bwd_in_meta_[0].Init(1);
}
const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
}
void GradNodeBase::RegisterGradientHook(
size_t slot_id, size_t rank,
const std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook) {
gradient_hooks_.emplace_back(std::make_tuple(slot_id, rank, hook));
}
void GradNodeBase::RegisterReduceHook(const std::function<void(void)>& hook) {
reduce_hooks_.emplace_back(hook);
}
std::vector<std::vector<egr::EagerTensor>> GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<egr::EagerTensor>>& tensors) {
std::vector<std::vector<egr::EagerTensor>> outs(tensors.size());
for (auto& tuple : gradient_hooks_) {
size_t slot_id = std::get<0>(tuple);
size_t rank = std::get<1>(tuple);
std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook =
std::get<2>(tuple);
PADDLE_ENFORCE(slot_id < tensors.size(),
paddle::platform::errors::Fatal(
"Slot_id from registered hook should be smaller than "
"slot size of grad_tensors"));
PADDLE_ENFORCE(rank < tensors[slot_id].size(),
paddle::platform::errors::Fatal(
"rank of slot %d from registered hook should be smaller "
"than rank size of grad_tensors",
slot_id));
std::vector<egr::EagerTensor>& slot_out = outs[slot_id];
slot_out.resize(tensors[slot_id].size());
egr::EagerTensor& out = slot_out[rank];
if (!out.defined() || !out.initialized()) {
out = hook(tensors[slot_id][rank]);
} else {
// TODO(jiabin): Why this?
out = hook(out);
}
}
for (size_t i = 0; i < outs.size(); i++) {
if (outs[i].empty() && (!tensors[i].empty())) {
outs[i].resize(tensors[i].size());
}
// TODO(Jiabin): Optimize this if we only add hook slot by slot
for (size_t j = 0; j < outs[i].size(); j++) {
if (!outs[i][j].defined() || !outs[i][j].initialized()) {
outs[i][j] = tensors[i][j];
}
}
}
return outs;
}
void GradNodeBase::ApplyReduceHooks() {
for (auto& hook : reduce_hooks_) {
hook();
}
}
} // namespace egr
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/pten/api/all.h"
#include "paddle/pten/include/core.h"
namespace egr {
/**
* GradNodeBase is base class of all grad node, which is what should be used by
* eager execution, we define most of backward autograd members here, and for
* each Operator, they should hold their onw forward Inputs as TensorWrapper.
*
* The GradNodeBase will be held in autograd_meta, and it is also a member of
* Edge, which indicates the edge of backward graph.
*
* TODO:(yangzhanlue) GradNodeBase will also in charge of get the correct input
* from GradOpDescMaker to GradNodeBase.
*
* NOTE:GradNodeBase has a method named run, this method should be overrided by
* the
* specific derived class, it will prepare backward inputs and double backward's
* depends. Then, it will call C++ API of backward kernel functions to finish
* backward computation.
*
* NOTE:GradNodeBase holds its own inputs and Outputs
*
* Edge is defined to descripe depend of backward, an Edge is what linked
* between two
* node, it should contain a Node and rank of this Node (this is used to
* indicate which
* input of grad this edge belong).
* */
class Edge;
class AutogradMeta;
/**
* GradSlotMeta is used to Record Forward Tensor info to backward, since paddle
* has lots of operators
* whose backward logic is depends on if it has some specific inputs or outputs.
* So, we need a meta info
* to record it's needs.
* **/
class GradSlotMeta {
public:
GradSlotMeta() = default;
void Init(size_t size) {
size_ = static_cast<int>(size);
stop_gradient_.resize(size, false);
}
bool IsInitialized() const { return size_ != -1; }
bool IsStopGradient(size_t rank) const { return stop_gradient_[rank]; }
int Size() const { return size_; }
void SetStopGradient(size_t rank, bool stop_gradient = true) {
stop_gradient_.at(rank) = stop_gradient;
}
private:
int size_{-1};
std::vector<bool> stop_gradient_{false};
};
class GradNodeBase {
public:
GradNodeBase() = default;
GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num);
// TODO(jiabin): Should we have other constructor here?
virtual ~GradNodeBase() = default;
/**
* operator() designed to contian the real backward execution logic, it should
* be
* overrided by derived class defined for each operator. It accepts a vector
* of
* Tensor which contains grads input of current operator
*
* Note: why we need backward inputs and outputs construct as vector of vector
* of egr::EagerTensor?
* Since all of paddle op composite in form of {"Slot name ", vector<Var>},
* so, vector of vector
* is better choice to fit this format.
* **/
virtual std::vector<std::vector<egr::EagerTensor>> operator()(
const std::vector<std::vector<egr::EagerTensor>>& grads) = 0;
/**
* AddEdges is designed to set input tensors' backward Node as current
* node's Edges.
* This method should be call in forward code and for double backward depends
* computation.
*
* This one is called slot by slot
* **/
void AddEdges(const std::vector<AutogradMeta*>& metas, size_t slot_id);
void AddEdges(const AutogradMeta& meta, size_t slot_id);
/**
* GetEdges is designed to get all edges of current node**/
const std::vector<std::vector<Edge>>& GetEdges() const;
/**
* Get Input Meta of current Grad node**/
const std::vector<GradSlotMeta>& InputMeta() const;
/**
* Get Output Meta of current Grad node**/
const std::vector<GradSlotMeta>& OutputMeta() const;
/**
* Set bwd ins and outs info with forward vars
* **/
void SetGradInMeta(const std::vector<AutogradMeta*>& fwd_out,
size_t slot_rank);
void SetGradInMeta(const AutogradMeta& fwd_out, size_t slot_rank);
void SetGradOutMeta(const std::vector<AutogradMeta*>& fwd_in,
size_t slot_rank);
void SetGradOutMeta(const AutogradMeta& fwd_in, size_t slot_rank);
/**
* Default setters for Grad in/out meta this should be used for same special
* Node which will not create by user
* **/
void SetDefaultGradInOutMeta();
/**
* Register GradientHook or ReduceHook
* **/
void RegisterGradientHook(
size_t slot_id, size_t rank,
const std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook);
void RegisterReduceHook(const std::function<void(void)>& hook);
/**
* Apply GradientHook or ReduceHook
* **/
inline bool GradientHooksRegistered() { return gradient_hooks_.size() != 0; }
inline bool ReduceHooksRegistered() { return reduce_hooks_.size() != 0; }
std::vector<std::vector<egr::EagerTensor>> ApplyGradientHooks(
const std::vector<std::vector<egr::EagerTensor>>& tensors);
void ApplyReduceHooks();
private:
// TODO(jiabin): Use SmallVector instead after merge PR from develop
// Edges recorded the backward related node info, which indicate all edges
// linked
// by this Grad Node.
// Why we need vector<vector<Edge>>: Edges is as same rank as bwd output.
std::vector<std::vector<Edge>> adj_edges_;
// bwd_out_meta_ is used to record Grad output info for backward
std::vector<GradSlotMeta> bwd_out_meta_;
// bwd_in_meta_ used to record Grad input info for backward
std::vector<GradSlotMeta> bwd_in_meta_;
// Gradient Hooks
// Customer may register a list of hooks which will be called in order during
// backward
// Each entry consists one pair of <out_rank, std::function>
std::vector<std::tuple<
/* slot id */ size_t, /* rank */ size_t,
/* hook */ std::function<egr::EagerTensor(const egr::EagerTensor&)>>>
gradient_hooks_;
std::vector<std::function<void(void)>> reduce_hooks_;
};
class Edge {
public:
// Default constructor for Edges in order to construct it for AutogradMeta
Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {}
// In real use cases we should create Edge from grad node and input rank which
// indicate which edge it is.
// Since we have slot design in operators we will have to locate an edge with
// slot
// and rank.
Edge(const std::shared_ptr<GradNodeBase>& grad_node, size_t in_slot_id,
size_t in_rank)
: in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {}
Edge(const std::shared_ptr<GradNodeBase>& grad_node,
const std::pair</* slot_id */ size_t, /* rank */ size_t>& rank_info)
: in_slot_id_(rank_info.first),
in_rank_(rank_info.second),
grad_node_(grad_node) {}
GradNodeBase* GetGradNode() const { return grad_node_.get(); }
std::shared_ptr<GradNodeBase> GetMutableGradNode() const {
return grad_node_;
}
std::pair<size_t, size_t> GetEdgeRankInfo() const {
return std::make_pair(in_slot_id_, in_rank_);
}
void SetEdgeRankInfo(size_t slot_id, size_t in_rank) {
in_slot_id_ = slot_id;
in_rank_ = in_rank;
}
void SetEdgeRankInfo(
const std::pair</* slot_id */ size_t, /* rank */ size_t>& edge_rank) {
in_slot_id_ = edge_rank.first;
in_rank_ = edge_rank.second;
}
// Currently we use grad_node_ to identify if a edge is initialized.
bool IsInitialized() const { return grad_node_.get(); }
private:
size_t in_slot_id_;
size_t in_rank_;
std::shared_ptr<GradNodeBase> grad_node_;
};
} // namespace egr
cc_test(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS ${eager_deps}) cc_test(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS ${eager_deps} )
cc_test(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS ${eager_deps} grad_node_info)
cc_test(test_egr_ds_grad_node_info SRCS grad_node_info_test.cc DEPS ${eager_deps} grad_node_info)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h"
#include "paddle/pten/api/lib/utils/allocator.h"
TEST(AutogradMeta, Constructor) {
egr::EagerTensor et1;
auto auto_grad = std::make_shared<egr::AutogradMeta>();
et1.set_autograd_meta(auto_grad);
auto* tmp_auto = static_cast<egr::AutogradMeta*>(et1.get_autograd_meta());
CHECK_EQ(tmp_auto->OutRankInfo().first, size_t(0));
CHECK_EQ(tmp_auto->OutRankInfo().second, size_t(0));
CHECK(tmp_auto->IsInitialized() == false);
}
TEST(AutogradMeta, MemberFunction) {
egr::EagerTensor et1;
auto auto_grad = std::make_shared<egr::AutogradMeta>();
et1.set_autograd_meta(auto_grad);
auto* tmp_auto = static_cast<egr::AutogradMeta*>(et1.get_autograd_meta());
VLOG(6) << "Test Grad";
CHECK(tmp_auto->Grad().defined() == false);
auto* grad_t = tmp_auto->MutableGrad();
pten::DenseTensorMeta meta = pten::DenseTensorMeta(
pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2}));
std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>(
std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()),
meta);
auto* dt_ptr = dt->mutable_data<float>();
dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f;
grad_t->set_impl(dt);
VLOG(6) << "Test Mutable Grad";
auto impl_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(tmp_auto->Grad().impl());
CHECK_EQ(impl_ptr->data<float>()[0], 5.0f);
CHECK_EQ(impl_ptr->data<float>()[1], 10.0f);
VLOG(6) << "Test IsInitialized";
CHECK(tmp_auto->IsInitialized() == false);
VLOG(6) << "Test GradNodeSetter Getter";
auto grad_node = std::make_shared<eager_test::GradTestNode>();
tmp_auto->SetGradNode(grad_node);
CHECK(tmp_auto->IsInitialized() == true);
auto tmp_grad_node = tmp_auto->GetMutableGradNode();
std::dynamic_pointer_cast<eager_test::GradTestNode>(tmp_grad_node)->val_ =
5.0;
CHECK_EQ(dynamic_cast<eager_test::GradTestNode*>(tmp_auto->GradNode())->val_,
5.0);
VLOG(6) << "Test rank Setter Getter";
CHECK_EQ(tmp_auto->OutRankInfo().first, size_t(0));
CHECK_EQ(tmp_auto->OutRankInfo().second, size_t(0));
tmp_auto->SetSingleOutRankWithSlot(2, 3);
CHECK_EQ(tmp_auto->OutRankInfo().first, size_t(2));
CHECK_EQ(tmp_auto->OutRankInfo().second, size_t(3));
VLOG(6) << "Test stop gradient Setter Getter";
CHECK_EQ(tmp_auto->NumericStopGradient(), -1);
tmp_auto->SetStopGradient(true);
CHECK(tmp_auto->StopGradient() == true);
VLOG(6) << "Test Persistable Setter Getter";
CHECK(tmp_auto->Persistable() == false);
tmp_auto->SetPersistable(true);
CHECK(tmp_auto->Persistable() == true);
}
...@@ -19,9 +19,6 @@ ...@@ -19,9 +19,6 @@
#include "paddle/pten/api/lib/api_declare.h" #include "paddle/pten/api/lib/api_declare.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
// TODO(jiabin): remove nolint here!!!
using namespace egr; // NOLINT
namespace eager_test { namespace eager_test {
using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta; using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
class AutogradMetaTest : public AbstractAutogradMeta { class AutogradMetaTest : public AbstractAutogradMeta {
...@@ -31,8 +28,8 @@ class AutogradMetaTest : public AbstractAutogradMeta { ...@@ -31,8 +28,8 @@ class AutogradMetaTest : public AbstractAutogradMeta {
}; };
} }
TEST(EagerTensor, Constructor) { TEST(EagerTensor, Constructor) {
EagerTensor et1 = EagerTensor(); egr::EagerTensor et1 = egr::EagerTensor();
EagerTensor et2 = EagerTensor("et2"); egr::EagerTensor et2 = egr::EagerTensor("et2");
CHECK_EQ(et1.defined(), false); CHECK_EQ(et1.defined(), false);
CHECK_EQ(et2.name(), "et2"); CHECK_EQ(et2.name(), "et2");
...@@ -46,18 +43,18 @@ TEST(EagerTensor, Constructor) { ...@@ -46,18 +43,18 @@ TEST(EagerTensor, Constructor) {
auto* dt_ptr = dt->mutable_data<float>(); auto* dt_ptr = dt->mutable_data<float>();
dt_ptr[0] = 5.0f; dt_ptr[0] = 5.0f;
dt_ptr[1] = 10.0f; dt_ptr[1] = 10.0f;
EagerTensor et3 = EagerTensor(dt); egr::EagerTensor et3 = egr::EagerTensor(dt);
auto* et3_ptr = auto* et3_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et3.impl())->data<float>(); std::dynamic_pointer_cast<pten::DenseTensor>(et3.impl())->data<float>();
CHECK_EQ(et3_ptr[0], 5.0f); CHECK_EQ(et3_ptr[0], 5.0f);
CHECK_EQ(et3_ptr[1], 10.0f); CHECK_EQ(et3_ptr[1], 10.0f);
// copy constructor // copy constructor
EagerTensor et4(et3); egr::EagerTensor et4(et3);
auto* et4_ptr = auto* et4_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et4.impl())->data<float>(); std::dynamic_pointer_cast<pten::DenseTensor>(et4.impl())->data<float>();
CHECK_EQ(et4_ptr[0], 5.0f); CHECK_EQ(et4_ptr[0], 5.0f);
CHECK_EQ(et4_ptr[1], 10.0f); CHECK_EQ(et4_ptr[1], 10.0f);
EagerTensor et5(std::move(et4)); egr::EagerTensor et5(std::move(et4));
auto* et5_ptr = auto* et5_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et5.impl())->data<float>(); std::dynamic_pointer_cast<pten::DenseTensor>(et5.impl())->data<float>();
CHECK_EQ(et5_ptr[0], 5.0f); CHECK_EQ(et5_ptr[0], 5.0f);
...@@ -65,7 +62,7 @@ TEST(EagerTensor, Constructor) { ...@@ -65,7 +62,7 @@ TEST(EagerTensor, Constructor) {
} }
TEST(EagerTensor, MemberFunction) { TEST(EagerTensor, MemberFunction) {
EagerTensor et3; egr::EagerTensor et3;
pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DenseTensorMeta meta = pten::DenseTensorMeta(
pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2}));
std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>( std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>(
...@@ -96,7 +93,7 @@ TEST(EagerTensor, MemberFunction) { ...@@ -96,7 +93,7 @@ TEST(EagerTensor, MemberFunction) {
std::dynamic_pointer_cast<pten::DenseTensor>(et3.impl())->data<float>(); std::dynamic_pointer_cast<pten::DenseTensor>(et3.impl())->data<float>();
CHECK_EQ(dt3_ptr[0], 5.0f); CHECK_EQ(dt3_ptr[0], 5.0f);
CHECK_EQ(dt3_ptr[1], 10.0f); CHECK_EQ(dt3_ptr[1], 10.0f);
EagerTensor et4 = et3; egr::EagerTensor et4 = et3;
VLOG(6) << "copy ="; VLOG(6) << "copy =";
CHECK(et4.initialized() == true); CHECK(et4.initialized() == true);
auto* dt4_ptr = auto* dt4_ptr =
...@@ -104,7 +101,7 @@ TEST(EagerTensor, MemberFunction) { ...@@ -104,7 +101,7 @@ TEST(EagerTensor, MemberFunction) {
CHECK_EQ(dt4_ptr[0], 5.0f); CHECK_EQ(dt4_ptr[0], 5.0f);
CHECK_EQ(dt4_ptr[1], 10.0f); CHECK_EQ(dt4_ptr[1], 10.0f);
VLOG(6) << "move ="; VLOG(6) << "move =";
EagerTensor et5 = std::move(et4); egr::EagerTensor et5 = std::move(et4);
auto* dt5_ptr = auto* dt5_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et5.impl())->data<float>(); std::dynamic_pointer_cast<pten::DenseTensor>(et5.impl())->data<float>();
CHECK_EQ(dt5_ptr[0], 5.0f); CHECK_EQ(dt5_ptr[0], 5.0f);
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h"
#include "paddle/pten/api/lib/utils/allocator.h"
TEST(GradNodeInfo, GradSlotMeta) {
auto grad_slot = egr::GradSlotMeta();
CHECK(grad_slot.IsInitialized() == false);
VLOG(6) << "Init GradSlotMeta";
grad_slot.Init(2);
CHECK(grad_slot.IsInitialized() == true);
VLOG(6) << "Set SetStopGradient";
grad_slot.SetStopGradient(0);
CHECK(grad_slot.IsStopGradient(0) == true);
CHECK_EQ(grad_slot.Size(), 2);
}
TEST(GradNodeInfo, GradNodeBase) {
VLOG(6) << "Construct Grad Node";
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 2, /* out_num */ 2);
auto grad_test_node1 = std::make_shared<eager_test::GradTestNode>();
std::vector<std::vector<egr::EagerTensor>> grads;
pten::DenseTensorMeta meta = pten::DenseTensorMeta(
pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1}));
std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>(
std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()),
meta);
auto* dt_ptr = dt->mutable_data<float>();
dt_ptr[0] = 5.0f;
egr::EagerTensor et1(dt);
grads = {{et1}};
VLOG(6) << "Test Grad Node Call";
auto res = (*grad_test_node0)(grads);
CHECK_EQ(std::dynamic_pointer_cast<pten::DenseTensor>(res[0][0].impl())
->data<float>()[0],
6.0f);
VLOG(6) << "Test Add Edges";
egr::Edge edge0(grad_test_node1, 1, 2);
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(edge0);
egr::Edge edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(edge1);
grad_test_node0->AddEdges((*auto_grad0.get()), 0);
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first,
size_t(1));
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second,
size_t(2));
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
grad_test_node0->AddEdges(metas, 1);
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first,
size_t(3));
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second,
size_t(4));
VLOG(6) << "Test Set Meta and Get Meta";
auto_grad1->SetStopGradient(true);
grad_test_node0->SetGradInMeta(metas, 0);
grad_test_node0->SetGradInMeta(*auto_grad1.get(), 1);
grad_test_node0->SetGradOutMeta(metas, 0);
grad_test_node0->SetGradOutMeta(*auto_grad1.get(), 1);
CHECK_EQ(grad_test_node0->InputMeta()[0].Size(), 1);
CHECK_EQ(grad_test_node0->InputMeta()[1].Size(), 1);
CHECK(grad_test_node0->OutputMeta()[0].IsStopGradient(0));
CHECK(grad_test_node0->OutputMeta()[1].IsStopGradient(0));
VLOG(6) << "Test Default Set Meta and Get Meta";
auto grad_test_node2 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 1, /* out_num */ 1);
grad_test_node2->SetDefaultGradInOutMeta();
CHECK(grad_test_node2->OutputMeta()[0].IsInitialized());
CHECK(grad_test_node2->OutputMeta()[0].IsStopGradient(0) == false);
CHECK_EQ(grad_test_node2->OutputMeta()[0].Size(), 1);
VLOG(6) << "Test Gradient Hook";
auto gradient_hook = [](const egr::EagerTensor& et) -> egr::EagerTensor {
egr::EagerTensor res;
pten::DenseTensorMeta meta = pten::DenseTensorMeta(
pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1}));
std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>(
std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()),
meta);
auto* dt_ptr = dt->mutable_data<float>();
dt_ptr[0] = 6.0f;
auto* et_ptr =
std::dynamic_pointer_cast<pten::DenseTensor>(et.impl())->data<float>();
dt_ptr[0] += et_ptr[0];
res.set_impl(dt);
VLOG(6) << "Running Gradient Hook";
return res;
};
grad_test_node0->RegisterGradientHook(0, 0, gradient_hook);
// 5 + 6
auto grad_hook_res = grad_test_node0->ApplyGradientHooks(grads);
CHECK_EQ(
std::dynamic_pointer_cast<pten::DenseTensor>(grad_hook_res[0][0].impl())
->data<float>()[0],
11.0);
VLOG(6) << "Test Reduce Hook";
auto reduce_hook = [&](void) -> void {
auto* et_ptr = std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl())
->mutable_data<float>();
et_ptr[0] = 100.0;
VLOG(6) << "Running Reduce Hook";
};
grad_test_node0->RegisterReduceHook(reduce_hook);
grad_test_node0->ApplyReduceHooks();
CHECK_EQ(std::dynamic_pointer_cast<pten::DenseTensor>(et1.impl())
->data<float>()[0],
100.0);
}
TEST(GradNodeInfo, Edge) {
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(5, 2, 2);
VLOG(6) << "Test Construct Edge";
egr::Edge edge0 = egr::Edge();
CHECK(edge0.IsInitialized() == false);
egr::Edge edge1 = egr::Edge(grad_test_node0, size_t(0), size_t(0));
CHECK(edge1.IsInitialized() == true);
egr::Edge edge2 =
egr::Edge(grad_test_node0, std::make_pair(size_t(1), size_t(0)));
VLOG(6) << "Test Set Edge's Grad Node";
auto* grad_node = edge1.GetGradNode();
CHECK_EQ(grad_node->InputMeta().size(), size_t(2));
auto mt_grad_node = edge1.GetMutableGradNode();
auto auto_grad1 = std::make_shared<egr::AutogradMeta>();
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
// Uninitialized AutogradMeta indicates
mt_grad_node->SetGradInMeta(metas, 0);
CHECK(grad_node->InputMeta()[0].IsStopGradient(0) == true);
VLOG(6) << "Test Get/Set Edge Rank Info";
CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(1));
CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(0));
edge2.SetEdgeRankInfo(2, 3);
CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(2));
CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(3));
edge2.SetEdgeRankInfo(std::make_pair(size_t(4), size_t(5)));
CHECK_EQ(edge2.GetEdgeRankInfo().first, size_t(4));
CHECK_EQ(edge2.GetEdgeRankInfo().second, size_t(5));
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/pten/api/lib/utils/allocator.h"
namespace eager_test {
class GradTestNode : public egr::GradNodeBase {
public:
~GradTestNode() override = default;
GradTestNode(float val, int in_num, int out_num)
: GradNodeBase(in_num, out_num), val_(val) {}
GradTestNode() : GradNodeBase() { val_ = 1.0; }
std::vector<std::vector<egr::EagerTensor>> operator()(
const std::vector<std::vector<egr::EagerTensor>>& grads) override {
val_ = std::dynamic_pointer_cast<pten::DenseTensor>(grads[0][0].impl())
->data<float>()[0];
pten::DenseTensorMeta meta = pten::DenseTensorMeta(
pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1}));
std::shared_ptr<pten::DenseTensor> dt = std::make_shared<pten::DenseTensor>(
std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace()),
meta);
auto* dt_ptr = dt->mutable_data<float>();
dt_ptr[0] = 6.0f;
egr::EagerTensor et1(dt);
std::vector<std::vector<egr::EagerTensor>> res = {{et1}};
return res;
}
float val_;
};
} // namespace eager_test
...@@ -484,7 +484,7 @@ class PD_DLL_DECL Tensor final { ...@@ -484,7 +484,7 @@ class PD_DLL_DECL Tensor final {
* Tensor name: used to adapt original execution mechanism and debug analysis * Tensor name: used to adapt original execution mechanism and debug analysis
* in the development of new dygraph. It may be removed in the future. * in the development of new dygraph. It may be removed in the future.
*/ */
std::string name_; std::string name_{""};
}; };
} // namespace experimental } // namespace experimental
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册