提交 e9233d1c 编写于 作者: J Jiabin Yang 提交者: XiaoguangHu

Refactor dygraph (#19107)

* refactor dygraph,test=develop

* fix failed unittest,test=develop

* polish code,test=develop

* check windows ci error,test=develop
try to fix windows ci error by np.allclose,test=develop

* polish vlog and profiler, test=develop

* try to fix preceding ops order,test=develop

* test transformer in windows ci, test=develop

* use python c-api to speed up tracer.trace,test=develop

* test=develop, fix docker with paddle nccl problem

* test=develop, add ut for debug string and gradient_accumulator

* test=develop, add tests for layer/gradient_accumulator/prepared_op

* test=develop, fix complie error for test_prepared_op

* test=develop, add more ut for dygraph

* test=develop, create API.spec for dygraph api change

* test=develop, refoctor name to make it easier to understand

* test=develop, refoctor name to make it easier to understand

* test=develop, fix multi-gpu failed problem , add Tracer tests, change PADDLEENFORCE to PADDLEENFORCE_EQ

* test=develop, fix ut failed on parallel se-resnext

* test=develop, change one more PADDLE_ENFORCE
上级 dca9b6c5
......@@ -820,11 +820,11 @@ paddle.fluid.dygraph.TreeConv.state_dict (ArgSpec(args=['self', 'destination', '
paddle.fluid.dygraph.TreeConv.sublayers (ArgSpec(args=['self', 'include_sublayers'], varargs=None, keywords=None, defaults=(True,)), ('document', '00a881005ecbc96578faf94513bf0d62'))
paddle.fluid.dygraph.TreeConv.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer ('paddle.fluid.dygraph.tracer.Tracer', ('document', '28d72409112111274c33e1f07229d5da'))
paddle.fluid.dygraph.Tracer.__init__ (ArgSpec(args=['self', 'block'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer.all_parameters (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer.eval_mode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer.trace 1. trace(self: paddle.fluid.core_avx.Tracer, arg0: paddle.fluid.core_avx.OpBase, arg1: Dict[unicode, handle], arg2: Dict[unicode, handle], arg3: Dict[unicode, Variant], arg4: paddle::platform::CPUPlace, arg5: bool) -> None 2. trace(self: paddle.fluid.core_avx.Tracer, arg0: paddle.fluid.core_avx.OpBase, arg1: Dict[unicode, handle], arg2: Dict[unicode, handle], arg3: Dict[unicode, Variant], arg4: paddle::platform::CUDAPlace, arg5: bool) -> None
paddle.fluid.dygraph.Tracer.trace_op (ArgSpec(args=['self', 'op', 'inputs', 'outputs', 'stop_gradient'], varargs=None, keywords=None, defaults=(False,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer.trace 1. trace(self: paddle.fluid.core_avx.Tracer, arg0: unicode, arg1: Dict[unicode, handle], arg2: Dict[unicode, handle], arg3: Dict[unicode, Variant], arg4: paddle::platform::CUDAPlace, arg5: bool) -> None 2. trace(self: paddle.fluid.core_avx.Tracer, arg0: unicode, arg1: Dict[unicode, handle], arg2: Dict[unicode, handle], arg3: Dict[unicode, Variant], arg4: paddle::platform::CPUPlace, arg5: bool) -> None
paddle.fluid.dygraph.Tracer.trace_op (ArgSpec(args=['self', 'type', 'inputs', 'outputs', 'attrs', 'stop_gradient'], varargs=None, keywords=None, defaults=(False,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer.trace_var (ArgSpec(args=['self', 'name', 'var'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.Tracer.train_mode (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.dygraph.prepare_context (ArgSpec(args=['strategy'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
cc_library(imperative_flag SRCS flags.cc DEPS gflags)
if(WITH_PYTHON)
cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind profiler imperative_flag)
cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind profiler)
cc_library(engine SRCS engine.cc)
cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits)
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows var_type_traits layer)
cc_library(tracer SRCS tracer.cc DEPS layer engine)
cc_library(engine SRCS engine.cc DEPS layer gradient_accumulator)
cc_library(imperative_profiler SRCS profiler.cc)
cc_library(nccl_context SRCS nccl_context.cc DEPS device_context)
cc_test(nccl_context_test SRCS nccl_context_test.cc DEPS nccl_context)
endif()
add_subdirectory(tests)
......@@ -16,17 +16,12 @@
// Created by Jiabin on 2019-04-25.
//
#pragma once
#ifndef PADDLE_BACKWARDSTRATEGY_H
#define PADDLE_BACKWARDSTRATEGY_H
#endif // PADDLE_BACKWARDSTRATEGY_H
namespace paddle {
namespace imperative {
namespace detail {
class BackwardStrategy {
public:
struct BackwardStrategy {
/* DyGraph now support two kinds of backward strategy, one is sorted sum
* gradient, another is sum gradient once they are created */
// TODO(jiabin): add more Strategy when we support
......
......@@ -14,40 +14,219 @@
#include "paddle/fluid/imperative/engine.h"
#include <mutex> // NOLINT
#include <algorithm>
#include <memory>
#include <queue>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
namespace imperative {
static std::once_flag init_engine;
static Engine* engine;
void Engine::RunOp(paddle::imperative::OpBase* op,
const paddle::imperative::NameVarBaseMap& ins,
const paddle::imperative::NameVarBaseMap& outs,
const paddle::platform::Place& place) {
platform::RecordEvent event(op->Type());
op->Run(ins, outs);
}
class DummyEngine : public Engine {
public:
void Enqueue(Runnable* runnable) override {
queued_runnables_.push_back(runnable);
void BasicEngine::Init(VarBase* var, const detail::BackwardStrategy& strategy) {
backward_strategy_ = strategy;
const std::vector<OpBase*> ops = var->GradVarBase()->GradOps();
var->ClearGradOps();
if (ops.empty()) {
VLOG(3) << "Skip auto grad since there is no grad op for var: "
<< var->Name();
return;
} else {
bool valid = false;
for (const auto& op : ops) {
if (op) {
valid = true;
}
}
if (!valid) {
VLOG(3) << "Skip auto grad since all grad op of start VarBase is nullptr";
return;
}
}
init_ops_ = ops;
platform::RecordEvent record_event("Imperative Backward");
VLOG(3) << "start backward";
PADDLE_ENFORCE_EQ(var->HasGradVar(), true,
"Grad variable not exist for variable %s", var->Name());
size_t Size() const override { return queued_runnables_.size(); }
auto& fwd_var = var->Var().Get<framework::LoDTensor>();
auto* grad_var =
var->GradVarBase()->MutableVar()->GetMutable<framework::LoDTensor>();
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(fwd_var.place());
grad_var->Resize(fwd_var.dims());
grad_var->mutable_data(fwd_var.place(), fwd_var.type());
operators::math::set_constant(*dev_ctx, grad_var, 1.0);
}
void Sync() override {
for (Runnable* l : queued_runnables_) {
LOG(INFO) << "running " << reinterpret_cast<void*>(l);
bool BasicEngine::CheckBackwardInputs(OpBase* op) {
for (auto& pair : op->GetInsMap()) {
for (auto& var : pair.second) {
if (var && !var->StopGradient()) {
return true;
}
}
queued_runnables_.clear();
}
return false;
}
void BasicEngine::PrepareGradAccumulators(OpBase* op) {
for (const auto& pair : op->GetOutsMap()) {
for (const auto& var : pair.second) {
if (!var) continue;
auto& accumulator = accumulators_[var.get()];
if (!accumulator) {
if (backward_strategy_.sorted_sum_gradient_) {
accumulator.reset(new SortedGradientAccumulator(var.get()));
} else {
accumulator.reset(new EagerGradientAccumulator(var.get()));
}
}
accumulator->IncreaseRefCnt();
VLOG(3) << "Prepare to acccumulate variable grad " << var->Name()
<< "with reference count " << accumulator->RefCnt();
}
}
}
void BasicEngine::PrepareDeps() {
PADDLE_ENFORCE_EQ(op_deps_.empty(), true, "Op deps must be initialized here");
PADDLE_ENFORCE_EQ(accumulators_.empty(), true,
"Accumulators must be initialized here");
std::queue<OpBase*> q;
std::unordered_set<OpBase*> visited;
for (const auto& init_op : init_ops_) {
q.push(init_op);
visited.insert(init_op);
}
private:
std::vector<Runnable*> queued_runnables_;
};
while (!q.empty()) {
auto* cur_op = q.front();
q.pop();
VLOG(3) << "Checking grads of op " << cur_op->Type();
Engine* GetEngine() {
std::call_once(init_engine, []() { engine = new DummyEngine(); });
return engine;
if (!CheckBackwardInputs(cur_op)) {
// TODO(zjl): clear ops that do not need grad before running autograd
VLOG(3) << "Stop checking preceding ops of " << cur_op->Type()
<< " because all of its backward inputs is stop_gradient=True";
continue;
}
PrepareGradAccumulators(cur_op);
auto& preceding_ops = cur_op->GradPendingOps();
for (auto* preceding_op : preceding_ops) {
PADDLE_ENFORCE_NOT_NULL(preceding_op);
++op_deps_[preceding_op];
if (visited.count(preceding_op) == 0) {
visited.insert(preceding_op);
q.push(preceding_op);
}
}
}
}
void BasicEngine::SumGradient(OpBase* op, std::shared_ptr<VarBase> src,
VarBase* dst) {
auto iter = accumulators_.find(dst);
PADDLE_ENFORCE_EQ(iter != accumulators_.end(), true,
"Cannot find gradient of variable %s", dst->Name());
iter->second->Add(std::move(src), op->id());
}
void BasicEngine::Execute() {
PrepareDeps();
// Start execute Computation graph
std::queue<OpBase*> q;
for (const auto& init_op : init_ops_) {
q.push(init_op);
}
while (!q.empty()) {
OpBase* cur_op = q.front();
q.pop();
// Step 1: Run Backward
auto& bwd_ins = cur_op->GetInsMap();
auto& bwd_outs = cur_op->GetOutsMap();
NameVarBaseMap tmp_outs;
// A var may be coresponding to several grad var in one op
std::unordered_map<VarBase*, std::vector<std::shared_ptr<VarBase>>> var_map;
size_t counter = 0;
for (auto& bwd_out : bwd_outs) {
auto& tmp_var_list = tmp_outs[bwd_out.first];
tmp_var_list.reserve(bwd_out.second.size());
for (auto& var : bwd_out.second) {
auto tmp_var = std::make_shared<VarBase>(
false, "Gtmp@" + std::to_string(counter++)); // Do not need grad
tmp_var_list.emplace_back(tmp_var);
if (var) {
var_map[var.get()].emplace_back(std::move(tmp_var));
var->ClearGradOps();
}
}
}
VLOG(3) << "Start to execute grad op " << cur_op->Type();
RunOp(cur_op, bwd_ins, tmp_outs, cur_op->place());
// Step 2: Sum Gradient
{
platform::RecordEvent record_event("merge_grads");
for (auto& var_pair : var_map) {
auto* dst_var = var_pair.first;
if (dst_var == nullptr) continue;
for (auto& src_var : var_pair.second) {
VLOG(3) << "Sum gradient of variable " << dst_var->Name()
<< " after op " << cur_op->Type();
SumGradient(cur_op, std::move(src_var), dst_var);
}
}
}
// Step 3: Collect ready ops
for (auto* preceding_op : cur_op->GradPendingOps()) {
PADDLE_ENFORCE_NOT_NULL(preceding_op);
auto iter = op_deps_.find(preceding_op);
if (iter == op_deps_.end()) {
continue;
}
VLOG(3) << "Found preceding op of " << cur_op->Type();
// An Op is ready to go while its deps comes to zero
if (--(iter->second) == 0) {
q.push(preceding_op);
VLOG(3) << "Push preceding op " << preceding_op->Type()
<< " into queue";
}
}
// Step 4: Delete op to collect unused variables
VLOG(3) << "Remove op after op " << cur_op->Type() << " runs";
RemoveOp(cur_op);
}
VLOG(3) << "Clean properties of BasicEngine";
CleanEngine();
}
} // namespace imperative
} // namespace paddle
......@@ -16,24 +16,80 @@
#include <cstddef>
#include <cstdint>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/imperative/backward_strategy.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/imperative/layer.h"
namespace paddle {
namespace imperative {
struct Runnable {};
// It seems there is no need for Engine to be an
// singleton, we can have multi-engine to run
// mutil-graoh. For future use we may expose a interface
// to Python to support
class Engine {
public:
virtual ~Engine() {}
virtual ~Engine() = default;
virtual void Execute() = 0;
virtual void Init(VarBase* var, const detail::BackwardStrategy& strategy) = 0;
virtual void RunOp(imperative::OpBase* op, const NameVarBaseMap& ins,
const NameVarBaseMap& outs, const platform::Place& place);
virtual void Enqueue(Runnable* runnable) = 0;
virtual void RemoveOp(OpBase* op) {
PADDLE_ENFORCE_NOT_NULL(op, "Cannot remove null op");
auto iter = grad_ops_.find(op);
PADDLE_ENFORCE_EQ(iter != grad_ops_.end(), true, "Op is not inside tracer");
grad_ops_.erase(iter);
}
virtual size_t Size() const = 0;
void InsertOp(OpBase* op, std::shared_ptr<OpBase> op_shared) {
grad_ops_[op] = std::move(op_shared);
}
void Clear() { grad_ops_.clear(); }
virtual void Sync() = 0;
private:
std::unordered_map<OpBase*, std::shared_ptr<OpBase>>
grad_ops_; // opBase for remove - grad_op
};
Engine* GetEngine();
class BasicEngine : public Engine {
public:
BasicEngine() = default;
void Init(VarBase* var, const detail::BackwardStrategy& strategy) override;
~BasicEngine() override = default;
void Execute() override;
private:
void PrepareDeps();
bool CheckBackwardInputs(OpBase* op);
void PrepareGradAccumulators(OpBase* op);
void SumGradient(OpBase* op, std::shared_ptr<VarBase> src, VarBase* dst);
// TODO(jiabin): maybe we can optimize the performance of engine by cache the
// result
void CleanEngine() {
init_ops_.clear();
op_deps_.clear();
accumulators_.clear();
Clear();
}
std::vector<OpBase*> init_ops_;
detail::BackwardStrategy backward_strategy_;
std::unordered_map<OpBase*, size_t> op_deps_;
std::unordered_map<VarBase*, std::unique_ptr<GradientAccumulator>>
accumulators_;
};
} // namespace imperative
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include <algorithm>
#include <memory>
#include <utility>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
namespace imperative {
template <typename T>
class TensorAddFunctor : public boost::static_visitor<> {
public:
TensorAddFunctor(int64_t numel, const T* x, T* y)
: numel_(numel), x_(x), y_(y) {}
void operator()(const platform::CPUPlace& place) {
platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
platform::DeviceContextPool::Instance().Get(place));
auto blas = operators::math::GetBlas<platform::CPUDeviceContext, T>(*ctx);
blas.AXPY(numel_, 1., x_, y_);
}
#ifdef PADDLE_WITH_CUDA
void operator()(const platform::CUDAPlace& place) {
platform::CUDADeviceContext* ctx =
dynamic_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(place));
auto blas = operators::math::GetBlas<platform::CUDADeviceContext, T>(*ctx);
blas.AXPY(numel_, 1., x_, y_);
}
#else
void operator()(const platform::CUDAPlace& place) {
PADDLE_THROW("Do NOT support gradient merge in place %s", place);
}
#endif
// there is NO blas in CUDAPinnedPlace
void operator()(const platform::CUDAPinnedPlace& place) {
PADDLE_THROW("Do NOT support gradient merge in place %s", place);
}
private:
int64_t numel_;
const T* x_;
T* y_;
};
void TensorAdd(const framework::Variable& src, framework::Variable* dst) {
auto* dst_tensor = dst->GetMutable<framework::LoDTensor>();
auto& src_tensor = src.Get<framework::LoDTensor>();
auto numel = src_tensor.numel();
// FIXME(minqiyang): loss_grad op will pass a zero grad of label
// ugly fix for it
if (numel == 0) {
return;
}
PADDLE_ENFORCE_EQ(dst_tensor->numel() == numel, true,
"dst_numel %d vs. src_numel %d", dst_tensor->numel(),
numel);
auto data_type = src_tensor.type();
auto place = src_tensor.place();
#define PADDLE_TENSOR_ADD_MACRO(cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
TensorAddFunctor<cpp_type> func( \
numel, src_tensor.data<cpp_type>(), \
dst_tensor->mutable_data<cpp_type>(place)); \
boost::apply_visitor(func, place); \
return; \
}
PADDLE_TENSOR_ADD_MACRO(float);
PADDLE_TENSOR_ADD_MACRO(double);
#undef PADDLE_TENSOR_ADD_MACRO
PADDLE_THROW("Not supported data type %s for AddTo",
framework::DataTypeToString(data_type));
}
void EagerGradientAccumulator::Add(std::shared_ptr<VarBase> var,
size_t trace_id) {
auto* dst_var = var_->MutableVar();
if (cur_cnt_ == 0) {
*dst_var = std::move(*(var->MutableVar()));
} else {
TensorAdd(var->Var(), dst_var);
}
++cur_cnt_;
}
void SortedGradientAccumulator::Add(std::shared_ptr<VarBase> var,
size_t trace_id) {
auto* dst_var = var_->MutableVar();
if (ref_cnt_ == 1) {
*dst_var = std::move(*(var->MutableVar()));
} else {
if (tmp_grad_vars_.empty()) {
tmp_grad_vars_.reserve(ref_cnt_);
}
tmp_grad_vars_.emplace_back(std::move(var), trace_id);
if (tmp_grad_vars_.size() != ref_cnt_) {
return;
}
std::sort(tmp_grad_vars_.begin(), tmp_grad_vars_.end(),
[](const std::pair<std::shared_ptr<VarBase>, size_t>& p1,
const std::pair<std::shared_ptr<VarBase>, size_t>& p2) {
return p1.second > p2.second;
});
*dst_var = std::move(*(tmp_grad_vars_[0].first->MutableVar()));
for (size_t i = 1; i < tmp_grad_vars_.size(); ++i) {
TensorAdd(tmp_grad_vars_[i].first->Var(), dst_var);
}
tmp_grad_vars_.clear();
}
}
} // namespace imperative
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/imperative/layer.h"
namespace paddle {
namespace imperative {
class GradientAccumulator {
public:
explicit GradientAccumulator(VarBase* var) : var_(var) {}
virtual void Add(std::shared_ptr<VarBase> var, size_t trace_id) = 0;
virtual ~GradientAccumulator() = default;
inline void IncreaseRefCnt() { ++ref_cnt_; }
inline size_t RefCnt() const { return ref_cnt_; }
protected:
VarBase* var_;
size_t ref_cnt_{0};
};
class EagerGradientAccumulator : public GradientAccumulator {
public:
using GradientAccumulator::GradientAccumulator;
void Add(std::shared_ptr<VarBase> var, size_t trace_id) override;
private:
size_t cur_cnt_{0};
};
class SortedGradientAccumulator : public GradientAccumulator {
public:
using GradientAccumulator::GradientAccumulator;
void Add(std::shared_ptr<VarBase> var, size_t trace_id) override;
private:
std::vector<std::pair<std::shared_ptr<VarBase>, size_t>> tmp_grad_vars_;
};
} // namespace imperative
} // namespace paddle
此差异已折叠。
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/imperative/prepared_operator.h"
#include <sstream>
namespace paddle {
namespace imperative {
const framework::Tensor* GetTensorFromVar(const framework::Variable& var) {
if (var.IsType<framework::LoDTensor>()) {
return &(var.Get<framework::LoDTensor>());
} else if (var.IsType<framework::SelectedRows>()) {
return &(var.Get<framework::SelectedRows>().value());
} else {
return nullptr;
}
}
platform::Place PreparedOp::GetExpectedPlace(const platform::Place& place,
const NameVarBaseMap& ins) {
bool found = false;
for (auto& name_pair : ins) {
for (auto& var_base : name_pair.second) {
const auto* tensor = GetTensorFromVar(var_base->Var());
if (tensor && tensor->IsInitialized()) {
auto tmp_place = tensor->place();
PADDLE_ENFORCE_EQ(!found || tmp_place == place, true,
"Input variable should keep in the same place: %s, "
"but get place: %s of input %s instead",
place, tmp_place, name_pair.first);
}
}
}
return place;
}
PreparedOp::PreparedOp(const framework::OperatorBase& op,
const framework::RuntimeContext& ctx,
framework::OperatorWithKernel::OpKernelFunc func,
platform::DeviceContext* dev_ctx,
std::vector<framework::KernelConfig>* kernel_configs)
: op_(op),
ctx_(ctx),
func_(std::move(func)),
dev_ctx_(dev_ctx),
kernel_configs_(kernel_configs) {}
PreparedOp PreparedOp::Prepare(const framework::RuntimeContext& ctx,
const framework::OperatorWithKernel& op,
const platform::Place& place) {
auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
// check if op[type] has kernel registered.
auto& all_op_kernels = op.AllOpKernels();
auto kernels_iter = all_op_kernels.find(op.Type());
if (kernels_iter == all_op_kernels.end()) {
PADDLE_THROW(
"There are no kernels which are registered in the %s operator.",
op.Type());
}
auto& kernels = kernels_iter->second;
auto expected_kernel_key =
op.GetExpectedKernelType(framework::ExecutionContext(
op, framework::Scope(), *dev_ctx, ctx, nullptr));
VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key);
// TODO(jiabin): Add operator.cc's line 1000 part back when we need that case
if (kernel_iter == kernels.end()) {
PADDLE_THROW("op %s does not have kernel for %s", op.Type(),
KernelTypeToString(expected_kernel_key));
}
std::vector<framework::KernelConfig>* kernel_configs =
op.GetKernelConfig(expected_kernel_key);
return PreparedOp(op, ctx, kernel_iter->second, dev_ctx, kernel_configs);
}
void PreparedOp::Run() {
// TODO(zjl): remove scope in dygraph
framework::Scope scope;
op_.RuntimeInferShape(scope, dev_ctx_->GetPlace(), ctx_);
func_(framework::ExecutionContext(op_, scope, *dev_ctx_, ctx_,
kernel_configs_));
}
} // namespace imperative
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace paddle {
namespace imperative {
const framework::Tensor* GetTensorFromVar(const framework::Variable& var);
class PreparedOp {
public:
static PreparedOp Prepare(const framework::RuntimeContext& ctx,
const framework::OperatorWithKernel& op,
const platform::Place& place);
inline platform::DeviceContext* GetDeviceContext() const { return dev_ctx_; }
void Run();
static platform::Place GetExpectedPlace(const platform::Place& place,
const NameVarBaseMap& ins);
private:
PreparedOp(const framework::OperatorBase& op,
const framework::RuntimeContext& ctx,
framework::OperatorWithKernel::OpKernelFunc func,
platform::DeviceContext* dev_ctx,
std::vector<framework::KernelConfig>* kernel_configs);
private:
const framework::OperatorBase& op_;
const framework::RuntimeContext& ctx_;
framework::OperatorWithKernel::OpKernelFunc func_;
platform::DeviceContext* dev_ctx_;
std::vector<framework::KernelConfig>* kernel_configs_;
};
} // namespace imperative
} // namespace paddle
cc_test(nccl_context_test SRCS nccl_context_test.cc DEPS nccl_context)
cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS gradient_accumulator memcpy)
cc_test(test_layer SRCS test_layer.cc DEPS layer proto_desc operator op_registry variable_helper mul_op)
cc_test(test_prepare_op SRCS test_prepare_op.cc DEPS prepared_operator op_info split_op layer concat_and_split)
cc_test(test_tracer SRCS test_tracer.cc DEPS tracer layer proto_desc operator op_registry variable_helper mul_op)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/memory/memcpy.h"
namespace imperative = paddle::imperative;
namespace platform = paddle::platform;
namespace framework = paddle::framework;
namespace paddle {
namespace imperative {
void TensorAdd(const framework::Variable& src, framework::Variable* dst);
#if defined(PADDLE_WITH_CUDA)
template <typename T>
int TensorGPUAddTest(platform::CUDAPlace place, T t1, T t2) {
framework::Variable var1;
framework::Variable var2;
std::vector<T> src_data(10, t1);
std::vector<T> dst_data(10, t2);
std::vector<T> result;
platform::CPUPlace src_place;
for (unsigned int i = 0; i < 10; i++) {
result.emplace_back(src_data[i] + dst_data[i]);
}
std::vector<int64_t> dims = {2, 5};
auto* src = var1.GetMutable<framework::LoDTensor>();
auto* dst = var2.GetMutable<framework::LoDTensor>();
src->Resize(framework::make_ddim(dims));
dst->Resize(framework::make_ddim(dims));
auto* src_mutable = src->mutable_data<T>(place);
auto* dst_mutable = dst->mutable_data<T>(place);
paddle::memory::Copy(place, src_mutable, src_place, src_data.data(),
sizeof(T) * src_data.size(), 0);
paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(),
sizeof(T) * dst_data.size(), 0);
imperative::TensorAdd(var1, &var2);
framework::LoDTensor rlt;
platform::CPUPlace rlt_place;
framework::TensorCopySync(*dst, rlt_place, &rlt);
for (unsigned int i = 0; i < rlt.numel(); i++) {
if (rlt.data<T>()[i] != result[i]) return 1;
}
return 0;
}
#endif
template <typename T>
int TensorCPUAddTest(platform::CPUPlace place, T t1, T t2) {
framework::Variable var1;
framework::Variable var2;
std::vector<T> src_data(10, t1);
std::vector<T> dst_data(10, t2);
std::vector<T> result;
platform::CPUPlace src_place;
for (unsigned int i = 0; i < 10; i++) {
result.emplace_back(src_data[i] + dst_data[i]);
}
std::vector<int64_t> dims = {2, 5};
auto* src = var1.GetMutable<framework::LoDTensor>();
auto* dst = var2.GetMutable<framework::LoDTensor>();
src->Resize(framework::make_ddim(dims));
dst->Resize(framework::make_ddim(dims));
auto* src_mutable = src->mutable_data<T>(place);
auto* dst_mutable = dst->mutable_data<T>(place);
paddle::memory::Copy(place, src_mutable, src_place, src_data.data(),
sizeof(T) * src_data.size());
paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(),
sizeof(T) * dst_data.size());
imperative::TensorAdd(var1, &var2);
framework::LoDTensor rlt;
platform::CPUPlace rlt_place;
framework::TensorCopySync(*dst, rlt_place, &rlt);
for (unsigned int i = 0; i < rlt.numel(); i++) {
if (rlt.data<T>()[i] != result[i]) return 1;
}
return 0;
}
TEST(test_add_functor, add_functor) {
#if defined(PADDLE_WITH_CUDA)
platform::CUDAPlace gpu_place(0);
#endif
platform::CPUPlace cpu_place;
int cpu_res = 1;
cpu_res = TensorCPUAddTest(cpu_place, 1.0, 0.0);
EXPECT_EQ(cpu_res, 0);
cpu_res = TensorCPUAddTest(cpu_place, static_cast<double>(1.0),
static_cast<double>(2.0));
EXPECT_EQ(cpu_res, 0);
#if defined(PADDLE_WITH_CUDA)
int gpu_res = 1;
gpu_res = TensorGPUAddTest(gpu_place, 1.0, 0.0);
EXPECT_EQ(gpu_res, 0);
gpu_res = TensorGPUAddTest(gpu_place, static_cast<double>(1.0),
static_cast<double>(2.0));
EXPECT_EQ(gpu_res, 0);
#endif
}
} // namespace imperative
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Created by Jiabin on 2019-08-16.
//
#include <paddle/fluid/framework/op_registry.h>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/imperative/layer.h"
namespace imperative = paddle::imperative;
namespace platform = paddle::platform;
namespace framework = paddle::framework;
namespace paddle {
namespace imperative {
using vb_vector = std::vector<std::shared_ptr<imperative::VarBase>>;
using var_pair = std::pair<std::string, vb_vector>;
TEST(test_layer, test_runtime_context) {
std::shared_ptr<imperative::VarBase> vin(
new imperative::VarBase(false, "vin"));
std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(false, "vout"));
var_pair in_pair = var_pair("X", vb_vector(1, vin));
var_pair out_pair = var_pair("Out", vb_vector(1, vout));
imperative::NameVarBaseMap ins = {in_pair};
imperative::NameVarBaseMap outs = {out_pair};
framework::AttributeMap attrs;
auto* ctx = new imperative::RuntimeInferVarTypeContext(ins, &outs, attrs);
ASSERT_TRUE(ctx->HasVar("vin"));
ASSERT_TRUE(ctx->HasInput("X"));
ASSERT_TRUE(ctx->HasOutput("Out"));
ASSERT_ANY_THROW(ctx->GetDataTypes("vin"));
std::vector<framework::proto::VarType::Type> NullType;
ASSERT_ANY_THROW(ctx->SetDataTypes("vin", NullType));
ASSERT_ANY_THROW(ctx->GetShape("vin"));
ASSERT_ANY_THROW(ctx->GetLoDLevel("vin"));
ASSERT_ANY_THROW(ctx->SetLoDLevel("vin", 2));
}
std::string LayerDebugString(const std::string& op_type,
const NameVarBaseMap& ins,
const NameVarBaseMap& outs);
TEST(test_layer, test_debug_string_test_debug_Test) {
std::shared_ptr<imperative::VarBase> vin(
new imperative::VarBase(false, "vin"));
std::shared_ptr<imperative::VarBase> vin_error(
new imperative::VarBase(false, "vin_error"));
std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(false, "vout"));
std::shared_ptr<imperative::VarBase> vout_error(
new imperative::VarBase(false, "vout_error"));
vin_error->MutableVar()->GetMutable<framework::LoDTensor>();
vout->MutableVar()->GetMutable<framework::LoDTensor>();
vout_error->MutableVar()->GetMutable<framework::SelectedRows>();
var_pair in_pair = var_pair("X", vb_vector(1, vin));
vb_vector vb_in_error = {vin_error, nullptr};
var_pair vin_error_pair = var_pair("X", vb_in_error);
var_pair out_pair = var_pair("Out", vb_vector(1, vout));
var_pair vout_error_pair = var_pair("Out2", vb_vector(1, vout_error));
imperative::NameVarBaseMap ins = {in_pair};
imperative::NameVarBaseMap ins_error = {vin_error_pair};
imperative::NameVarBaseMap outs = {out_pair};
imperative::NameVarBaseMap outs_error = {vout_error_pair};
ASSERT_NO_FATAL_FAILURE(LayerDebugString("test_op", ins, outs));
std::string res = LayerDebugString("test_op", ins, outs_error);
ASSERT_TRUE(res.find("UNRESOLVED_TYPE") != std::string::npos);
std::string res2 = LayerDebugString("test_op", ins_error, outs_error);
VLOG(3) << res2;
ASSERT_TRUE(res2.find("NOT_INITED") != std::string::npos);
ASSERT_TRUE(res2.find("NULL") != std::string::npos);
}
TEST(test_layer, test_clear_backward_info) {
std::shared_ptr<imperative::VarBase> vin(
new imperative::VarBase(false, "vin"));
std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(false, "vout"));
framework::OpDesc desc;
platform::CPUPlace place;
var_pair x_pair = var_pair("X", vb_vector(1, vin));
var_pair y_pair = var_pair("Y", vb_vector(1, vin));
var_pair out_pair = var_pair("Out", vb_vector(1, vout));
imperative::NameVarBaseMap ins = {x_pair, y_pair};
imperative::NameVarBaseMap outs = {out_pair};
framework::AttributeMap concat_att_map;
concat_att_map["axis"] = 1;
std::shared_ptr<imperative::OpBase> op(
OpBase::Create(0, "mul", ins, outs, concat_att_map, place));
std::shared_ptr<imperative::OpBase> preceding_op(
OpBase::Create(0, "mul", ins, outs, concat_att_map, place));
op->InsertGradPendingOps(preceding_op.get());
*(op->GetMutableInsMap()) = ins;
*(op->GetMutableOutsMap()) = outs;
ASSERT_GT(op->GetInsMap().size(), 0);
ASSERT_GT(op->GetOutsMap().size(), 0);
ASSERT_GT(op->GradPendingOps().size(), 0);
op->ClearBackwardTrace();
ASSERT_EQ(op->GetInsMap().size(), 0);
ASSERT_EQ(op->GetOutsMap().size(), 0);
ASSERT_EQ(op->GradPendingOps().size(), 0);
}
TEST(test_layer, test_varbase_basic) {
platform::CPUPlace place;
std::shared_ptr<imperative::VarBase> vin(
new imperative::VarBase(false, "vin"));
vin->MutableVar()->GetMutable<framework::LoDTensor>()->mutable_data<float>(
place);
std::shared_ptr<imperative::VarBase> vout(vin->NewVarBase(place, false));
ASSERT_EQ(vout->Name(), "Itmp0");
std::shared_ptr<imperative::VarBase> vin_with_grad(
new imperative::VarBase(true, "vin"));
ASSERT_ANY_THROW(vin->MutableGradVar());
ASSERT_NO_THROW(ASSERT_TRUE(dynamic_cast<framework::Variable*>(
vin_with_grad->MutableGradVar()) != 0));
ASSERT_TRUE(
dynamic_cast<framework::Variable*>(vin_with_grad->MutableGradVar()) != 0);
vin_with_grad->SetStopGradient(true);
ASSERT_TRUE(vin_with_grad->StopGradient());
ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetPersistable(true));
ASSERT_TRUE(vin_with_grad->StopGradient());
ASSERT_NO_FATAL_FAILURE(vin_with_grad->SetName("new_name"));
ASSERT_EQ(vin_with_grad->Name(), "new_name");
}
// TODO(jiabin): Add more ut here for layer
} // namespace imperative
} // namespace paddle
USE_OP(mul);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Created by Jiabin on 2019-08-19.
//
#include <paddle/fluid/framework/op_registry.h>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/imperative/prepared_operator.h"
#include "paddle/fluid/imperative/type_defs.h"
namespace imperative = paddle::imperative;
namespace platform = paddle::platform;
namespace framework = paddle::framework;
namespace paddle {
namespace imperative {
static framework::RuntimeContext PrepareRuntimeContext(
const NameVarBaseMap& ins, const NameVarBaseMap& outs) {
framework::VariableValueMap inputs, outputs;
for (auto& in_pair : ins) {
auto& in_ctx = inputs[in_pair.first];
in_ctx.reserve(in_pair.second.size());
for (auto& in_var : in_pair.second) {
in_ctx.emplace_back(in_var->MutableVar());
}
}
for (auto& out_pair : outs) {
auto& out_ctx = outputs[out_pair.first];
out_ctx.reserve(out_pair.second.size());
for (auto& out_var : out_pair.second) {
out_ctx.emplace_back(out_var->MutableVar());
}
}
return framework::RuntimeContext(std::move(inputs), std::move(outputs));
}
static framework::VariableNameMap CreateVarNameMap(
const framework::OpInfo& op_info, const std::string& op_type,
const NameVarBaseMap& varbase_map, bool is_input) {
if (op_info.proto_ == nullptr) {
return {};
}
framework::VariableNameMap result;
for (auto& var :
is_input ? op_info.Proto().inputs() : op_info.Proto().outputs()) {
auto it = varbase_map.find(var.name());
if (it == varbase_map.end()) {
PADDLE_ENFORCE_EQ(
var.dispensable(), true,
"Var: %s not dispensable and there are no such var in inputs",
var.name());
result[var.name()] = {};
} else {
auto& var_vector = it->second;
std::vector<std::string> args;
args.reserve(var_vector.size());
for (auto& var_base : var_vector) {
args.emplace_back(var_base->Name());
}
result[var.name()] = std::move(args);
}
}
return result;
}
using vb_vector = std::vector<std::shared_ptr<imperative::VarBase>>;
using var_pair = std::pair<std::string, vb_vector>;
TEST(test_prepare_op, test_prepare_op) {
std::shared_ptr<imperative::VarBase> vin(
new imperative::VarBase(false, "vin"));
std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(false, "vout"));
framework::OpDesc desc;
platform::CPUPlace place;
vin->MutableVar()->GetMutable<framework::LoDTensor>()->mutable_data<float>(
place);
var_pair x_pair = var_pair("X", vb_vector(1, vin));
var_pair out_pair = var_pair("Out", vb_vector(1, vout));
imperative::NameVarBaseMap ins = {x_pair};
imperative::NameVarBaseMap outs = {out_pair};
framework::AttributeMap split_attr_map;
const auto& info = framework::OpInfoMap::Instance().Get("split");
framework::VariableNameMap var_in_map =
CreateVarNameMap(info, "split", ins, true);
framework::VariableNameMap var_out_map =
CreateVarNameMap(info, "split", outs, false);
framework::OperatorWithKernel op("split", var_in_map, var_out_map,
split_attr_map);
framework::RuntimeContext ctx = PrepareRuntimeContext(ins, outs);
ASSERT_NO_FATAL_FAILURE(PreparedOp preparedOp =
PreparedOp::Prepare(ctx, op, place));
}
const framework::Tensor* GetTensorFromVar(const framework::Variable& var);
TEST(test_prepare_op, test_get_tensor_from_var) {
std::shared_ptr<imperative::VarBase> vout_error(
new imperative::VarBase(false, "vout_error"));
vout_error->MutableVar()->GetMutable<framework::SelectedRows>();
auto* ts = GetTensorFromVar(*vout_error->MutableVar());
ASSERT_TRUE(ts != nullptr);
}
} // namespace imperative
} // namespace paddle
USE_OP(split);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Created by Jiabin on 2019-08-16.
//
#include <paddle/fluid/framework/op_registry.h>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/imperative/tracer.h"
namespace imperative = paddle::imperative;
namespace platform = paddle::platform;
namespace framework = paddle::framework;
namespace paddle {
namespace imperative {
using vb_vector = std::vector<std::shared_ptr<imperative::VarBase>>;
using var_pair = std::pair<std::string, vb_vector>;
TEST(test_tracer, test_trace_op) {
// Doing an mul
imperative::Tracer tracer;
std::shared_ptr<imperative::VarBase> x_in(
new imperative::VarBase(true, "x_in"));
std::shared_ptr<imperative::VarBase> y_in(
new imperative::VarBase(true, "y_in"));
std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(true, "vout"));
platform::CPUPlace place;
std::vector<float> src_data(10, 2.0);
std::vector<int64_t> dims1 = {2, 5};
std::vector<int64_t> dims2 = {5, 2};
auto* x_in_tensor = x_in->MutableVar()->GetMutable<framework::LoDTensor>();
auto* y_in_tensor = y_in->MutableVar()->GetMutable<framework::LoDTensor>();
x_in_tensor->Resize(framework::make_ddim(dims1));
auto* mutable_x = x_in_tensor->mutable_data<float>(place);
paddle::memory::Copy(place, mutable_x, place, src_data.data(),
sizeof(float) * src_data.size());
y_in_tensor->Resize(framework::make_ddim(dims2));
auto* mutable_y = y_in_tensor->mutable_data<float>(place);
paddle::memory::Copy(place, mutable_y, place, src_data.data(),
sizeof(float) * src_data.size());
var_pair x_pair = var_pair("X", vb_vector(1, x_in));
var_pair y_pair = var_pair("Y", vb_vector(1, y_in));
var_pair out_pair = var_pair("Out", vb_vector(1, vout));
imperative::NameVarBaseMap ins = {x_pair, y_pair};
imperative::NameVarBaseMap outs = {out_pair};
framework::AttributeMap mul_attr_map;
mul_attr_map["use_mkldnn"] = false;
tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true);
const auto& out_tensor = vout->Var().Get<framework::LoDTensor>();
for (size_t i = 0; i < vout->Var().Get<framework::LoDTensor>().numel(); i++) {
ASSERT_EQ(out_tensor.data<float>()[i], 20.0);
}
}
TEST(test_tracer, test_track_backward_output) {
// Doing an mul
imperative::Tracer tracer;
std::shared_ptr<imperative::VarBase> x_in(
new imperative::VarBase(true, "x_in"));
std::shared_ptr<imperative::VarBase> y_in(
new imperative::VarBase(false, "y_in"));
std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(true, "vout"));
platform::CPUPlace place;
std::vector<float> src_data(10, 2.0);
std::vector<int64_t> dims1 = {2, 5};
std::vector<int64_t> dims2 = {5, 2};
auto* x_in_tensor = x_in->MutableVar()->GetMutable<framework::LoDTensor>();
auto* y_in_tensor = y_in->MutableVar()->GetMutable<framework::LoDTensor>();
x_in_tensor->Resize(framework::make_ddim(dims1));
auto* mutable_x = x_in_tensor->mutable_data<float>(place);
paddle::memory::Copy(place, mutable_x, place, src_data.data(),
sizeof(float) * src_data.size());
y_in_tensor->Resize(framework::make_ddim(dims2));
auto* mutable_y = y_in_tensor->mutable_data<float>(place);
paddle::memory::Copy(place, mutable_y, place, src_data.data(),
sizeof(float) * src_data.size());
var_pair x_pair = var_pair("X", vb_vector(1, x_in));
var_pair y_pair = var_pair("Y", vb_vector(1, y_in));
var_pair out_pair = var_pair("Out", vb_vector(1, vout));
imperative::NameVarBaseMap ins = {x_pair, y_pair};
imperative::NameVarBaseMap outs = {out_pair};
framework::AttributeMap mul_attr_map;
mul_attr_map["use_mkldnn"] = false;
ASSERT_ANY_THROW(tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true));
}
TEST(test_tracer, test_track_backward_input) {
// Doing an mul
imperative::Tracer tracer;
std::shared_ptr<imperative::VarBase> x_in(
new imperative::VarBase(true, "x_in"));
std::shared_ptr<imperative::VarBase> y_in(
new imperative::VarBase(true, "y_in"));
std::shared_ptr<imperative::VarBase> vout(
new imperative::VarBase(false, "vout"));
platform::CPUPlace place;
std::vector<float> src_data(10, 2.0);
std::vector<int64_t> dims1 = {2, 5};
std::vector<int64_t> dims2 = {5, 2};
auto* x_in_tensor = x_in->MutableVar()->GetMutable<framework::LoDTensor>();
auto* y_in_tensor = y_in->MutableVar()->GetMutable<framework::LoDTensor>();
x_in_tensor->Resize(framework::make_ddim(dims1));
auto* mutable_x = x_in_tensor->mutable_data<float>(place);
paddle::memory::Copy(place, mutable_x, place, src_data.data(),
sizeof(float) * src_data.size());
y_in_tensor->Resize(framework::make_ddim(dims2));
auto* mutable_y = y_in_tensor->mutable_data<float>(place);
paddle::memory::Copy(place, mutable_y, place, src_data.data(),
sizeof(float) * src_data.size());
var_pair x_pair = var_pair("X", vb_vector(1, x_in));
var_pair y_pair = var_pair("Y", vb_vector(1, y_in));
var_pair out_pair = var_pair("Out", vb_vector(1, vout));
imperative::NameVarBaseMap ins = {x_pair, y_pair};
imperative::NameVarBaseMap outs = {out_pair};
framework::AttributeMap mul_attr_map;
mul_attr_map["use_mkldnn"] = false;
ASSERT_ANY_THROW(tracer.TraceOp("mul", ins, outs, mul_attr_map, place, true));
}
} // namespace imperative
} // namespace paddle
USE_OP(mul);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -11,282 +11,207 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/imperative/tracer.h"
#include <memory>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
namespace imperative {
void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block,
std::vector<framework::OpDesc*>* grad_op_descs,
std::unordered_map<std::string, std::string>* grad_to_var) {
PADDLE_ENFORCE(grad_op_descs->empty());
const framework::OpInfo& op_info =
framework::OpInfoMap::Instance().Get(op_desc.Type());
if (!op_info.grad_op_maker_) return;
std::vector<std::unique_ptr<framework::OpDesc>> descs =
op_info.GradOpMaker()(op_desc, no_grad_set, grad_to_var, grad_sub_block);
for (auto& desc : descs) {
grad_op_descs->emplace_back(desc.release());
static std::vector<std::unique_ptr<framework::OpDesc>> CreateGradOpDescs(
const framework::OpInfo& op_info, const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block,
std::unordered_map<std::string, std::string>* grad_to_var) {
if (op_info.grad_op_maker_) {
return op_info.grad_op_maker_(op_desc, no_grad_set, grad_to_var,
grad_sub_block);
} else {
return {};
}
}
void CreateNoBuffuerGrad(std::shared_ptr<imperative::VarBase> var,
platform::DeviceContext* dev_ctx) {
PADDLE_ENFORCE_NOT_NULL(var, "Could not get valid var base");
PADDLE_ENFORCE_NOT_NULL(dev_ctx,
"Could not get valid device from forward op");
if (var->grads_ == nullptr) {
auto& var_t = var->var_->Get<framework::LoDTensor>();
var->grads_ = std::shared_ptr<imperative::VarBase>(
new VarBase(var->GradName(), framework::proto::VarType::FP32,
framework::vectorize(var_t.dims()), dev_ctx->GetPlace(),
var->IsStopGradient(), false, false));
void Tracer::TraceOp(const std::string& type, const NameVarBaseMap& ins,
const NameVarBaseMap& outs, framework::AttributeMap attrs,
const platform::Place& place, bool trace_backward) {
platform::RecordEvent event(type);
VLOG(1) << "Trace Op: " << type;
size_t op_id = GenerateUniqueId();
auto op = OpBase::Create(op_id, type, ins, outs, std::move(attrs), place);
op->Run(ins, outs);
if (ComputeRequiredGrad(ins, outs, trace_backward)) {
TraceBackward(op, framework::OpDesc(op->Type(), op->InputNameMap(),
op->OutputNameMap(), op->Attrs()),
ins, outs);
}
}
platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs) {
platform::Place result = place;
for (const auto& it : inputs) {
for (const std::shared_ptr<imperative::VarBase>& var : it.second) {
platform::Place tmp_place =
var->var_->Get<framework::LoDTensor>().place();
if (!platform::is_same_place(tmp_place, result)) {
PADDLE_THROW(
"Input variable should keep in the same place: %s, but get place: "
"%s of input %s instead",
result, tmp_place, it.first);
}
}
}
return result;
bool Tracer::ComputeRequiredGrad(const NameVarBaseMap& ins,
const NameVarBaseMap outs,
bool trace_backward) {
// TODO(jiabin): Implement auto prune here
return trace_backward;
}
framework::VariableNameMap CreateInputVarNameMap(
const OpBase* op, const VarBasePtrMap& varbase_map) {
framework::VariableNameMap result;
void Tracer::TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
const framework::OpDesc& fwd_op_desc,
const NameVarBaseMap& ins,
const NameVarBaseMap& outs) {
// grad_to_var is a map of framework::GradVarName(in_var_name/out_var_name) ->
// in_var_name/out_var_name
std::unordered_map<std::string, std::string> grad_to_var;
auto& info_map = framework::OpInfoMap::Instance();
auto* op_info = info_map.GetNullable(op->Type());
if (op_info == nullptr || op_info->proto_ == nullptr) {
return result;
}
// Get grad_op_desc using fwd_op_desc
std::vector<std::unique_ptr<framework::OpDesc>> grad_op_descs_ =
CreateGradOpDescs(fwd_op->Info(), fwd_op_desc, {}, {}, &grad_to_var);
for (auto& in : op_info->Proto().inputs()) {
auto it = varbase_map.find(in.name());
if (it == varbase_map.end()) {
PADDLE_ENFORCE(in.dispensable());
result[in.name()] = {};
} else {
auto var_vector = it->second;
std::vector<std::string> args;
args.reserve(var_vector.size());
for (std::shared_ptr<imperative::VarBase> var_base : var_vector) {
args.emplace_back(var_base->Name());
}
result[in.name()] = args;
}
}
return result;
}
// Create grad_ops using grad_op_descs
framework::VariableNameMap CreateOutputVarNameMap(
const OpBase* op, const VarBasePtrMap& varbase_map) {
framework::VariableNameMap result;
size_t grad_op_num = grad_op_descs_.size();
auto& info_map = framework::OpInfoMap::Instance();
auto* op_info = info_map.GetNullable(op->Type());
if (op_info == nullptr || op_info->proto_ == nullptr) {
return result;
}
VLOG(3) << "Create " << grad_op_num << " grad op desc(s) to op "
<< fwd_op->Type();
for (auto& out : op_info->Proto().outputs()) {
auto it = varbase_map.find(out.name());
if (it == varbase_map.end()) {
PADDLE_ENFORCE(out.dispensable());
result[out.name()] = {};
} else {
auto var_vector = it->second;
std::vector<std::string> args;
args.reserve(var_vector.size());
for (const std::shared_ptr<imperative::VarBase>& var_base : var_vector) {
args.emplace_back(var_base->Name());
}
result[out.name()] = args;
}
if (grad_op_num == 0) {
return;
}
return result;
}
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
VarBasePtrMap* outputs, framework::AttributeMap attrs_map,
const platform::Place expected_place,
const bool stop_gradient) {
platform::RecordEvent record_event(op->type_);
framework::VariableValueMap invars_map;
framework::VariableValueMap outvars_map;
// Construct input_vars_map and output_vars_map
std::map<std::string, std::shared_ptr<imperative::VarBase>> current_vars_map;
for (auto it : inputs) {
auto& invars = invars_map[it.first];
invars.reserve(it.second.size());
for (std::shared_ptr<imperative::VarBase> inp : it.second) {
PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr", op->Type(),
inp->Name());
invars.emplace_back(inp->var_.get());
if (!stop_gradient) {
current_vars_map[inp->Name()] = inp;
}
VLOG(3) << "input var name: " << inp->Name()
<< " inited: " << inp->var_->IsInitialized()
<< " stop_grad: " << inp->IsStopGradient();
// Build a map to record var_name -> std::shared_ptr<VarBase>*,
// so that we can find suitable var in grad op descs
std::unordered_map<std::string, const std::shared_ptr<VarBase>*> name_to_var;
for (auto& pair : ins) {
for (auto& var : pair.second) {
auto& var_ptr = name_to_var[var->Name()];
PADDLE_ENFORCE_EQ(var_ptr == nullptr || var_ptr->get() == var.get(), true,
"There are different variables with same name %s",
var->Name());
var_ptr = &var;
}
op->TrackPreOp(it.first, it.second);
}
for (const auto& it : *outputs) {
auto& outvars = outvars_map[it.first];
const std::vector<std::shared_ptr<imperative::VarBase>>& outputs_tmp =
it.second;
outvars.reserve(outputs_tmp.size());
for (size_t i = 0U; i < outputs_tmp.size(); ++i) {
// Add weak_ptr to track outputs
op->outputs_ref.emplace_back(outputs_tmp[i]);
std::shared_ptr<imperative::VarBase> out = outputs_tmp[i];
outvars.emplace_back(out->var_.get());
out->TrackPreOp(op, it.first, i, stop_gradient);
if (!stop_gradient) {
current_vars_map[out->Name()] = out;
}
VLOG(3) << "output var name: " << out->Name()
<< " inited: " << out->var_->IsInitialized()
<< " stop_grad: " << out->IsStopGradient();
for (auto& pair : outs) {
for (auto& var : pair.second) {
auto& var_ptr = name_to_var[var->Name()];
PADDLE_ENFORCE_EQ(var_ptr == nullptr || var_ptr->get() == var.get(), true,
"There are different variables with same name %s",
var->Name());
var_ptr = &var;
}
}
// Check attrs and create op
framework::VariableNameMap invars_name_map =
CreateInputVarNameMap(op, inputs);
framework::VariableNameMap outvars_name_map =
CreateOutputVarNameMap(op, *outputs);
auto& info = framework::OpInfoMap::Instance().Get(op->Type());
if (info.Checker() != nullptr) {
info.Checker()->Check(&attrs_map);
}
std::unique_ptr<framework::OperatorBase> op_base =
framework::OpRegistry::CreateOp(op->Type(), invars_name_map,
outvars_name_map, attrs_map);
if (info.infer_var_type_) {
RuntimeInferVarTypeContext infer_var_type_ctx(&inputs, outputs, &attrs_map);
info.infer_var_type_(&infer_var_type_ctx);
}
// TODO(minqiyang): Support infer var type in imperative mode
// Run forward op
VLOG(3) << "tracer running " << op->Type();
framework::RuntimeContext ctx(invars_map, outvars_map);
// TODO(panyx0718): Cache p.
framework::OperatorWithKernel* op_kernel =
dynamic_cast<framework::OperatorWithKernel*>(op_base.get());
PADDLE_ENFORCE_NOT_NULL(op_kernel, "only support op with kernel");
framework::Scope scope;
op->place_ = GetExpectedPlace(expected_place, inputs);
PreparedOp prepared_op = PreparedOp::Prepare(ctx, *op_kernel, op->place_);
prepared_op.op.RuntimeInferShape(scope, op->place_, ctx);
prepared_op.func(
framework::ExecutionContext(prepared_op.op, scope, *prepared_op.dev_ctx,
prepared_op.ctx, prepared_op.kernel_configs));
if (!stop_gradient) {
VLOG(5) << "start construct backward op";
// construct grad op descs
op->attrs_ = attrs_map;
std::unique_ptr<framework::OpDesc> fwd_op_desc(new framework::OpDesc(
op->Type(), invars_name_map, outvars_name_map, attrs_map));
std::unique_ptr<std::unordered_map<std::string, std::string>> grad_to_var(
new std::unordered_map<std::string, std::string>());
// NOTE(minqiyang): We don't support control flow op in imperative now
// Add grad_block_ when we want to support it
CreateGradOp(*fwd_op_desc, {}, {}, &op->grad_op_descs_, grad_to_var.get());
VLOG(5) << "create grad op desc: " << op->grad_op_descs_[0]->Type();
const size_t grad_op_count = op->grad_op_descs_.size();
// Build backward ins and outs
for (size_t i = 0; i < grad_op_num; i++) {
// Step1: build grad op and add them to engine
// Use trace id to decide the order of gradient sum in sorted sum mode
size_t trace_id = fwd_op->id();
std::shared_ptr<OpBase> grad_op =
OpBase::Create(trace_id, (*(grad_op_descs_[i].get())), fwd_op->place());
// this OpBase* is just used to manage op's life time
engine_->InsertOp(grad_op.get(), grad_op);
std::unordered_set<OpBase*> visited_preceding_ops;
// Step2 : prepare grad_in vars and bind them with grad_op,
// set inputs' grad_op as current grad_op
for (const auto& grad_ins : grad_op_descs_[i]->Inputs()) {
if (grad_ins.second.empty()) continue;
auto& bwd_in = (*grad_op->GetMutableInsMap())[grad_ins.first];
bwd_in.reserve(grad_ins.second.size());
for (auto& grad_in_var_name : grad_ins.second) {
auto iter = grad_to_var.find(grad_in_var_name);
if (iter != grad_to_var.end()) {
// If it is a grad var, find its coresponding forward var
auto& fwd_var_name = iter->second;
auto fwd_var_iter = name_to_var.find(fwd_var_name);
PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true,
"Cannot find forward variable named %s",
fwd_var_name);
PADDLE_ENFORCE_NOT_NULL(
(*(fwd_var_iter->second))->GradVarBase(),
"Grad of %s should "
"not be NULL when we Track_Backward Input of %s",
(*(fwd_var_iter->second))->Name(), grad_op->Type());
(*(fwd_var_iter->second))->GradVarBase()->AddGradOps(grad_op);
VLOG(3) << "Add Grad Op " << grad_op->Type() << " for :"
<< (*(fwd_var_iter->second))->GradVarBase()->Name();
bwd_in.emplace_back((*(fwd_var_iter->second))->GradVarBase());
} else {
// If it is a forward var, just add it
auto fwd_var_iter = name_to_var.find(grad_in_var_name);
PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true,
"Cannot find forward variable named %s",
grad_in_var_name);
bwd_in.emplace_back(*(fwd_var_iter->second));
}
op->grad_input_vars_.resize(grad_op_count);
op->grad_output_vars_.resize(grad_op_count);
VLOG(3) << "Set backward input " << grad_ins.first << " of "
<< grad_op->Type() << " to be "
<< (bwd_in.back() ? bwd_in.back()->Name() : "nullptr");
}
}
for (size_t i = 0; i < grad_op_count; ++i) {
framework::OpDesc* grad_op_desc = op->grad_op_descs_[i];
for (auto it : grad_op_desc->Inputs()) {
auto& grad_in_vars = op->grad_input_vars_[i][it.first];
grad_in_vars.reserve(it.second.size());
for (const std::string& grad_invar : it.second) {
auto var_it = grad_to_var->find(grad_invar);
if (var_it == grad_to_var->end()) {
auto fwd_var_it = current_vars_map.find(grad_invar);
PADDLE_ENFORCE(fwd_var_it != current_vars_map.end());
// Forward inputs or outputs.
grad_in_vars.emplace_back(fwd_var_it->second);
} else {
std::shared_ptr<imperative::VarBase> var =
current_vars_map[var_it->second];
CreateNoBuffuerGrad(var, prepared_op.GetDeviceContext());
// Douts.
var->grads_->SetPreOp(var->PreOp());
grad_in_vars.emplace_back(var->grads_);
// Step3: prepare grad_out vars and using their grad_ops to set current
// grad_op's preceding op
for (auto& grad_outs : grad_op_descs_[i]->Outputs()) {
if (grad_outs.second.empty()) continue;
auto& bwd_out = (*grad_op->GetMutableOutsMap())[grad_outs.first];
bwd_out.reserve(grad_outs.second.size());
for (auto& grad_out_var_name : grad_outs.second) {
auto iter = grad_to_var.find(grad_out_var_name);
PADDLE_ENFORCE_EQ(iter != grad_to_var.end(), true,
"Cannot find output of input grad %s in op %s",
grad_out_var_name, fwd_op->Type());
auto fwd_var_iter = name_to_var.find(iter->second);
PADDLE_ENFORCE_EQ(fwd_var_iter != name_to_var.end(), true,
"Cannot find forward variable named %s",
iter->second);
PADDLE_ENFORCE_NOT_NULL(
(*(fwd_var_iter->second))->GradVarBase(),
"Grad of %s should "
"not be NULL when we Track_Backward Output of %s",
(*(fwd_var_iter->second))->Name(), grad_op->Type());
bwd_out.emplace_back((*(fwd_var_iter->second))->GradVarBase());
VLOG(3) << "Set backward output " << grad_outs.first << " of "
<< grad_op->Type() << " to be "
<< (bwd_out.back() ? bwd_out.back()->Name() : "nullptr");
auto preceding_ops =
(*(fwd_var_iter->second))->GradVarBase()->GradOps();
if (VLOG_IS_ON(3) && !preceding_ops.empty()) {
VLOG(3) << "Add preceding Op of :"
<< (*(fwd_var_iter->second))->GradVarBase()->Name()
<< " It's preceding Op are: ";
for (const auto& op : preceding_ops) {
VLOG(3) << op->Type();
}
}
}
for (auto it : grad_op_desc->Outputs()) {
auto& grad_out_vars = op->grad_output_vars_[i][it.first];
for (const std::string& grad_outvar : it.second) {
auto var_it = grad_to_var->find(grad_outvar);
PADDLE_ENFORCE(var_it != grad_to_var->end(),
"Could not found the grad op output var, should this "
"operator %s's stop gradient be True",
op->Type());
std::shared_ptr<imperative::VarBase> var =
current_vars_map[var_it->second];
CreateNoBuffuerGrad(var, prepared_op.GetDeviceContext());
var->grads_->SetPreOp(var->PreOp());
grad_out_vars.push_back(var->grads_);
VLOG(3) << "grads output var name: " << var->name_;
if (!preceding_ops.empty()) {
for (const auto& op : preceding_ops) {
PADDLE_ENFORCE_NOT_NULL(op, "No nullptr should be preceding_op");
if (visited_preceding_ops.count(op) == 0) {
visited_preceding_ops.insert(op);
grad_op->InsertGradPendingOps(op);
}
}
} else {
VLOG(5) << "Hit leaf VarBase";
VLOG(5) << "Hit leaf VarBase"
<< (*(fwd_var_iter->second))->GradVarBase()->Name();
}
}
}
// To ensure numeric stability as static graph
grad_op->SortGradPendingOps();
}
}
} // namespace imperative
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -14,46 +14,48 @@
#pragma once
#include <map>
#include <set>
#include <atomic>
#include <future> // NOLINT
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "ThreadPool.h"
#include "paddle/fluid/imperative/engine.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/macros.h"
namespace paddle {
namespace imperative {
void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block,
framework::OpDesc** grad_op_desc,
std::unordered_map<std::string, std::string>* grad_to_var);
platform::Place GetExpectedPlace(platform::Place place, VarBasePtrMap inputs);
class Tracer {
DISABLE_COPY_AND_ASSIGN(Tracer);
public:
explicit Tracer(framework::BlockDesc* root_block);
Tracer() : engine_(new BasicEngine()) {}
virtual ~Tracer() {}
~Tracer() = default;
void Trace(OpBase* op, const VarBasePtrMap& inputs,
VarBasePtrMap* outputs, // NOLINT
framework::AttributeMap attrs_map,
const platform::Place expected_place,
const bool stop_gradient = false);
void TraceOp(const std::string& type, const NameVarBaseMap& ins,
const NameVarBaseMap& outs, framework::AttributeMap attrs,
const platform::Place& place, bool trace_bacward);
bool ComputeRequiredGrad(const NameVarBaseMap& ins, const NameVarBaseMap outs,
bool trace_backward);
void TraceBackward(const std::shared_ptr<OpBase>& fwd_op,
const framework::OpDesc& fwd_op_desc,
const NameVarBaseMap& ins, const NameVarBaseMap& outs);
Engine* GetDefaultEngine() const { return engine_.get(); }
private:
platform::Place GetPlace(const VarBasePtrMap& inputs);
static size_t GenerateUniqueId() {
static std::atomic<size_t> id{0};
return id.fetch_add(1);
}
framework::BlockDesc* root_block_;
private:
std::unique_ptr<Engine> engine_;
};
} // namespace imperative
......
......@@ -17,8 +17,6 @@ limitations under the License. */
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
namespace paddle {
......@@ -26,18 +24,10 @@ namespace imperative {
class VarBase;
class OpBase;
class Tracer;
typedef std::map<std::string, std::vector<std::shared_ptr<VarBase>>>
VarBasePtrMap;
typedef std::vector<std::weak_ptr<VarBase>> VarBaseWeakPtrList;
typedef std::map<std::string, std::vector<OpBase*>> OpBasePtrMap;
typedef std::unordered_map<
const VarBase*,
std::pair<platform::Place,
std::vector<std::pair<int, std::shared_ptr<VarBase>>>>>
BackwardSumMap; // var_grad -> {place, {id -> var_grad@rename}}
typedef std::unordered_map<const VarBase*, std::pair<int, bool>> GradientRef;
// var_grad -> {ref_times, is_first_to_be_accumulate}
using NameVarBaseMap =
std::map<std::string, std::vector<std::shared_ptr<VarBase>>>;
} // namespace imperative
} // namespace paddle
set(PYBIND_DEPS pybind python proto_desc memory executor fleet_wrapper box_wrapper nccl_wrapper prune
feed_fetch_method pass_builder parallel_executor profiler layer scope_pool
tracer analysis_predictor imperative_profiler nccl_context)
feed_fetch_method pass_builder parallel_executor profiler layer tracer engine scope_pool
analysis_predictor imperative_profiler nccl_context imperative_flag)
if(WITH_PYTHON)
list(APPEND PYBIND_DEPS py_func_op)
......
......@@ -20,11 +20,13 @@ limitations under the License. */
#include <pybind11/functional.h>
#include <pybind11/stl.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include "paddle/fluid/framework/block_desc.h"
#include <vector>
#include "paddle/fluid/imperative/backward_strategy.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/nccl_context.h"
#include "paddle/fluid/imperative/profiler.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/imperative/type_defs.h"
......@@ -44,16 +46,27 @@ class Layer : public imperative::Layer {
const std::vector<std::shared_ptr<imperative::VarBase>> &inputs)
override {
PYBIND11_OVERLOAD(std::vector<std::shared_ptr<imperative::VarBase>>, Layer,
Forward,
inputs); // NOLINT
Forward, inputs); // NOLINT
}
};
class PYBIND11_HIDDEN PyOpBase : public imperative::OpBase {
// warper for pyobject to avoid imperative module depend on python
// TODO(jiabin) Add OpBase's pybind interface back to enable backward hook
class PYBIND11_HIDDEN PyCallableObject {
public:
using imperative::OpBase::OpBase; // Inherit constructors
PyCallableObject(std::shared_ptr<py::object> py_obj_ptr)
: py_obj_ptr_(std::move(py_obj_ptr)) {}
~PyCallableObject() {
py::call_guard<py::gil_scoped_acquire>();
py_obj_ptr_.reset();
}
void operator()() {
py::call_guard<py::gil_scoped_acquire>();
py_obj_ptr_->operator()(this);
}
PyOpBase(const std::string &name) : OpBase(name) {}
private:
std::shared_ptr<py::object> py_obj_ptr_;
};
// Function like obj.attr_name in Python.
......@@ -125,33 +138,43 @@ GetVarBaseListFromPyHandle(const py::handle &handle) {
}
} else {
PADDLE_THROW(
"unsupported type %s, must be Variable, List[Variable] or "
"unsupported type %s, must be Variable, list[Variable] or "
"tuple[Variable]",
py::str(handle));
}
PADDLE_ENFORCE(PyErr_Occurred() == nullptr,
py::str(py::handle(PyErr_Occurred())));
return result;
}
using PyVarBaseMap = std::unordered_map<std::string, py::handle>;
using PyNameVarBaseMap = std::unordered_map<std::string, py::handle>;
static imperative::VarBasePtrMap ConvertToVarBasePtrMap(
const PyVarBaseMap &map) {
imperative::VarBasePtrMap result;
static imperative::NameVarBaseMap ConvertToNameVarBaseMap(
const PyNameVarBaseMap &map) {
imperative::NameVarBaseMap result;
for (auto &pair : map) {
auto var_vec = GetVarBaseListFromPyHandle(pair.second);
if (!var_vec.empty()) {
result.emplace(pair.first, std::move(var_vec));
}
}
PADDLE_ENFORCE_EQ(PyErr_Occurred() == nullptr, true,
py::str(py::handle(PyErr_Occurred())));
return result;
}
static std::string GetTypeName(const imperative::VarBase &var) {
if (var.Type() == framework::proto::VarType::RAW) {
return "RAW";
} else if (!var.Var().IsInitialized()) {
return "nullptr";
} else {
return framework::ToTypeName(var.Var().Type());
}
}
// Bind Methods
void BindImperative(pybind11::module *m_ptr) {
void BindImperative(py::module *m_ptr) {
auto &m = *m_ptr;
py::class_<imperative::detail::BackwardStrategy> backward_strategy(
......@@ -200,69 +223,88 @@ void BindImperative(pybind11::module *m_ptr) {
m.def("_dygraph_debug_level", []() { return imperative::GetDebugLevel(); });
py::class_<imperative::VarBase, std::shared_ptr<imperative::VarBase>>(
m, "VarBase", R"DOC()DOC")
m, "VarBase",
R"DOC()DOC")
.def_static("_alive_vars", &imperative::VarBase::AliveVarNames)
.def(
py::init<const std::string &, paddle::framework::proto::VarType::Type,
const std::vector<int64_t>, const paddle::platform::CPUPlace,
bool, bool>())
.def(
py::init<const std::string &, paddle::framework::proto::VarType::Type,
const std::vector<int64_t>,
const paddle::platform::CUDAPlace, bool, bool>())
.def("__init__",
[](imperative::VarBase &self, const std::string &name,
framework::proto::VarType::Type type,
framework::proto::VarType::Type dtype,
const std::vector<int> &dims, bool stop_gradient,
bool persistable) {
new (&self) imperative::VarBase(name);
self.SetPersistable(persistable);
self.SetType(type);
self.SetDataType(dtype);
self.SetStopGradient(stop_gradient);
if (type == framework::proto::VarType::LOD_TENSOR) {
auto *tensor =
self.MutableVar()->GetMutable<framework::LoDTensor>();
tensor->Resize(framework::make_ddim(dims));
}
})
.def("_run_backward",
[](imperative::VarBase &self,
const imperative::detail::BackwardStrategy &bckst) {
self.RunBackward(bckst);
})
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad_value", &imperative::VarBase::GradValue)
const imperative::detail::BackwardStrategy &bckst,
const imperative::Tracer &tracer) {
// TODO(jiabin): when we impl more backward execution we can select
// them
imperative::Engine *engine = tracer.GetDefaultEngine();
VLOG(3) << "Start backward";
engine->Init(&self, bckst);
engine->Execute();
VLOG(3) << "Finish backward";
},
py::call_guard<py::gil_scoped_release>())
.def("_grad_name", &imperative::VarBase::GradVarName)
.def("_grad_value",
[](imperative::VarBase &self) {
return self.MutableGradVar()->Get<framework::LoDTensor>();
},
py::return_value_policy::reference)
.def("_clear_gradient", &imperative::VarBase::ClearGradient)
.def("_grad_ivar",
[](const imperative::VarBase &self) { return self.grads_; },
py::return_value_policy::reference)
[](const imperative::VarBase &self) {
auto &grad_var = self.GradVarBase();
if (grad_var && grad_var->Var().IsInitialized()) {
return grad_var;
} else {
return std::shared_ptr<imperative::VarBase>(nullptr);
}
},
py::return_value_policy::copy)
.def("_copy_to",
[](const imperative::VarBase &self, const platform::CPUPlace &place,
bool blocking) {
return self.NewVarBase(place, blocking).release();
},
py::return_value_policy::take_ownership)
bool blocking) { return self.NewVarBase(place, blocking); },
py::return_value_policy::copy)
.def("_copy_to",
[](const imperative::VarBase &self, const platform::CUDAPlace &place,
bool blocking) {
return self.NewVarBase(place, blocking).release();
},
py::return_value_policy::take_ownership)
.def("value",
[](const imperative::VarBase &self) { return self.var_.get(); },
bool blocking) { return self.NewVarBase(place, blocking); },
py::return_value_policy::copy)
.def("value", [](imperative::VarBase &self) { return self.MutableVar(); },
py::return_value_policy::reference)
.def_property("name", &imperative::VarBase::Name,
&imperative::VarBase::SetName)
.def_property_readonly("shape", &imperative::VarBase::Shape)
.def_property_readonly(
"shape",
[](imperative::VarBase &self) {
if (self.Var().IsType<framework::LoDTensor>()) {
return framework::vectorize2int(
self.Var().Get<framework::LoDTensor>().dims());
} else {
VLOG(2) << "It is meaningless to get shape of variable type "
<< GetTypeName(self);
return std::vector<int>();
}
})
.def_property_readonly("type", &imperative::VarBase::Type)
.def_property_readonly("dtype", &imperative::VarBase::DataType)
.def_property("persistable", &imperative::VarBase::IsPersistable,
.def_property("persistable", &imperative::VarBase::Persistable,
&imperative::VarBase::SetPersistable)
.def_property("stop_gradient", &imperative::VarBase::IsStopGradient,
.def_property("stop_gradient", &imperative::VarBase::StopGradient,
&imperative::VarBase::SetStopGradient);
py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
.def(py::init<const std::string &>())
.def("register_backward_hooks",
[](imperative::OpBase &self, const py::object &callable) {
self.RegisterBackwardHooks(callable);
})
.def_property("_trace_id",
[](const imperative::OpBase &self) {
py::gil_scoped_release release;
return self.trace_id_;
},
[](imperative::OpBase &self, int trace_id) {
py::gil_scoped_release release;
self.trace_id_ = trace_id;
},
py::return_value_policy::reference)
.def_property_readonly("type", &imperative::OpBase::Type);
py::class_<imperative::Layer, Layer /* <--- trampoline*/> layer(m, "Layer");
layer.def(py::init<>())
.def("forward",
......@@ -271,42 +313,35 @@ void BindImperative(pybind11::module *m_ptr) {
return self.Forward(inputs);
});
// NOTE(zjl): Tracer use PyVarBaseMap as its parameter but not VarBasePtrMap.
// We call Python C-API to convert PyVarBaseMap to VarBasePtrMap, instead
// making conversion in Python code. This speed up Tracer.trace() about 6%
// in ptb model and make time cost in Python to be nearly zero.
py::class_<imperative::Tracer>(m, "Tracer", "")
.def("__init__",
[](imperative::Tracer &self, framework::BlockDesc *root_block) {
new (&self) imperative::Tracer(root_block);
})
[](imperative::Tracer &self) { new (&self) imperative::Tracer(); })
.def("trace",
[](imperative::Tracer &self, imperative::OpBase *op,
const PyVarBaseMap &inputs, const PyVarBaseMap &outputs,
framework::AttributeMap attrs_map,
const platform::CPUPlace expected_place,
const bool stop_gradient = false) {
auto ins = ConvertToVarBasePtrMap(inputs);
auto outs = ConvertToVarBasePtrMap(outputs);
[](imperative::Tracer &self, const std::string &type,
const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs,
framework::AttributeMap attrs, const platform::CUDAPlace &place,
bool trace_backward) {
auto ins_map = ConvertToNameVarBaseMap(ins);
auto outs_map = ConvertToNameVarBaseMap(outs);
{
py::gil_scoped_release release;
self.Trace(op, std::move(ins), &outs, attrs_map, expected_place,
stop_gradient);
self.TraceOp(type, std::move(ins_map), std::move(outs_map),
std::move(attrs), place, trace_backward);
}
})
.def("trace", [](imperative::Tracer &self, imperative::OpBase *op,
const PyVarBaseMap &inputs, const PyVarBaseMap &outputs,
framework::AttributeMap attrs_map,
const platform::CUDAPlace expected_place,
const bool stop_gradient = false) {
auto ins = ConvertToVarBasePtrMap(inputs);
auto outs = ConvertToVarBasePtrMap(outputs);
{
py::gil_scoped_release release;
self.Trace(op, std::move(ins), &outs, attrs_map, expected_place,
stop_gradient);
}
});
.def("trace",
[](imperative::Tracer &self, const std::string &type,
const PyNameVarBaseMap &ins, const PyNameVarBaseMap &outs,
framework::AttributeMap attrs, const platform::CPUPlace &place,
bool trace_backward) {
auto ins_map = ConvertToNameVarBaseMap(ins);
auto outs_map = ConvertToNameVarBaseMap(outs);
{
py::gil_scoped_release release;
self.TraceOp(type, std::move(ins_map), std::move(outs_map),
std::move(attrs), place, trace_backward);
}
});
// define parallel context
py::class_<imperative::ParallelStrategy> parallel_strategy(
......
......@@ -14,10 +14,6 @@ limitations under the License. */
#pragma once
#include <Python.h>
#include <string>
#include <vector>
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/nccl_context.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
......
......@@ -18,6 +18,7 @@ from paddle.fluid import core
from paddle.fluid import framework
from .tracer import Tracer
import logging
import objgraph
__all__ = [
'no_grad',
......@@ -123,7 +124,7 @@ def guard(place=None):
"""
train = framework.Program()
startup = framework.Program()
tracer = Tracer(train.current_block().desc)
tracer = Tracer()
if place is None:
if core.is_compiled_with_cuda():
......@@ -138,19 +139,22 @@ def guard(place=None):
yield
def _print_debug_msg():
def _print_debug_msg(limit=5, is_test=False):
if not core._is_dygraph_debug_enabled():
logging.warn(
'Debug mode is not enabled. Please set FLAGS_dygraph_debug=1 to enable debug'
)
return
unique_name_size = len(framework.unique_name.generator.ids)
tracer_var_size = len(framework._dygraph_tracer()._vars)
alive_cpp_var_size = len(core.VarBase._alive_vars())
logging.warn(
'unique_name num: {}, tracer vars num: {}, alive cpp vars num: {}'
.format(unique_name_size, tracer_var_size, alive_cpp_var_size))
if not is_test:
logging.warn(
'unique_name num: {}, tracer vars num: {}, alive cpp vars num: {}'
.format(unique_name_size, tracer_var_size, alive_cpp_var_size))
objgraph.show_growth(limit=limit)
else:
return unique_name_size, tracer_var_size, alive_cpp_var_size
def to_variable(value, block=None, name=None):
......
......@@ -20,7 +20,7 @@ from . import layers
from . import parallel_helper
from .. import framework
from ..layers import collective
from . import to_variable
from . import to_variable, no_grad
__all__ = ["prepare_context"]
......@@ -197,6 +197,7 @@ class DataParallel(layers.Layer):
for g_var, g_shape in zip(origin_grad_vars, grad_shapes):
nn.reshape(x=g_var, shape=g_shape, inplace=True)
@no_grad
def apply_collective_grads(self):
"""
AllReduce the Parameters' gradient.
......
......@@ -23,21 +23,15 @@ from paddle.fluid import framework
__all__ = ['Tracer']
def release_op(op):
del framework._dygraph_tracer()._ops[op._trace_id]
class Tracer(core.Tracer):
"""
Python wrapper of dygraph tracer
"""
def __init__(self, block):
super(Tracer, self).__init__(block)
def __init__(self):
super(Tracer, self).__init__()
self._ops = defaultdict()
self._vars = defaultdict()
self._trace_id = 0
self._train_mode = True
def trace_var(self, name, var):
......@@ -47,23 +41,10 @@ class Tracer(core.Tracer):
return list((item for name, item in six.iteritems(self._vars)
if isinstance(item, framework.Parameter)))
def _clear_ops(self):
self._ops = defaultdict()
self._trace_id = 0
def trace_op(self, op, inputs, outputs, stop_gradient=False):
# record op's trace id
op.iop._trace_id = self._trace_id
self.trace(op.iop, inputs, outputs, op.attrs,
framework._current_expected_place(), stop_gradient)
if not stop_gradient and self._train_mode:
self._trace_id += 1
self._ops[op.iop._trace_id] = op
# register backward hooks and variables if needed
op.iop.register_backward_hooks(release_op)
def trace_op(self, type, inputs, outputs, attrs, stop_gradient=False):
self.trace(type, inputs, outputs, attrs,
framework._current_expected_place(), self._train_mode and
not stop_gradient)
def train_mode(self):
self._train_mode = True
......
......@@ -458,9 +458,10 @@ class Variable(object):
self._ivar = kwargs.get("ivar", None)
if not self._ivar:
self._ivar = core.VarBase(
name, dtype if dtype else core.VarDesc.VarType.FP32,
list(shape) if shape else [],
_current_expected_place(), stop_gradient, True
name, type
if type else core.VarDesc.VarType.LOD_TENSOR, dtype
if dtype else core.VarDesc.VarType.FP32,
list(shape) if shape else [], stop_gradient, True
if persistable else False)
if persistable:
_dygraph_tracer().trace_var(name, self)
......@@ -582,13 +583,16 @@ class Variable(object):
return np.array(new_ivar.value().get_tensor())
def backward(self, backward_strategy=None):
from .dygraph import BackwardStrategy
if backward_strategy is None:
backward_strategy = BackwardStrategy()
backward_strategy.sort_sum_gradient = False
if in_dygraph_mode():
from .dygraph import BackwardStrategy
if backward_strategy is None:
backward_strategy = BackwardStrategy()
backward_strategy.sort_sum_gradient = False
self._ivar._run_backward(backward_strategy)
_dygraph_tracer()._clear_ops()
self._ivar._run_backward(backward_strategy, _dygraph_tracer())
else:
raise ValueError(
"Variable.backward() is only avaliable in DyGraph mode")
def gradient(self):
new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
......@@ -616,9 +620,13 @@ class Variable(object):
"""
if in_dygraph_mode():
# TODO(panyx0718): add more dygraph debug info.
return 'name %s, dtype: %s shape: %s %s' % (
self.name, self.dtype, self.shape,
str(self._ivar.value().get_tensor()))
tensor = self._ivar.value().get_tensor()
if tensor._is_initialized():
return 'name %s, dtype: %s shape: %s %s' % (
self.name, self.dtype, self.shape, str(tensor))
else:
return 'name %s, shape: %s, not inited' % (self.name,
self.shape)
assert isinstance(throw_on_error, bool) and isinstance(with_details,
bool)
......@@ -713,7 +721,7 @@ class Variable(object):
@property
def type(self):
if in_dygraph_mode():
return self._ivar.dtype
return self._ivar.type
else:
return self.desc.type()
......@@ -1085,9 +1093,7 @@ class Operator(object):
if type is None:
raise ValueError(
"`type` to initialized an Operator can not be None.")
self.iop = core.OpBase(type)
self.previous_ops = []
self._type = type
self.attrs = attrs if attrs else {}
else:
self.block = block
......@@ -1233,7 +1239,7 @@ class Operator(object):
@property
def type(self):
if in_dygraph_mode():
return self.iop.type
return self._type
else:
return self.desc.type()
......@@ -1787,10 +1793,12 @@ class Block(object):
else:
attrs['is_test'] = False
type = kwargs.get("type", None)
op = Operator(
block=self,
desc=None,
type=kwargs.get("type", None),
type=type,
inputs=None,
outputs=None,
attrs=attrs)
......@@ -1799,9 +1807,11 @@ class Block(object):
#
# TODO(minqiyang): add op stop_gradient support in static mode too.
# currently, we only support stop_gradient in dygraph mode.
_dygraph_tracer().trace_op(op,
_dygraph_tracer().trace_op(type,
kwargs.get("inputs", {}),
kwargs.get("outputs", {}),
kwargs.get("outputs", {}), attrs
if attrs else {},
kwargs.get("stop_gradient", False))
else:
op_desc = self.desc.append_op()
......@@ -1862,17 +1872,15 @@ class Block(object):
def _prepend_op(self, *args, **kwargs):
if in_dygraph_mode():
type = kwargs.get("type", None)
attrs = kwargs.get("attrs", {})
op = Operator(
self,
None,
type=kwargs.get("type", None),
inputs=None,
outputs=None,
attrs=kwargs.get("attrs", {}))
self, None, type=type, inputs=None, outputs=None, attrs=attrs)
_dygraph_tracer().trace_op(op,
_dygraph_tracer().trace_op(type,
kwargs.get("inputs", {}),
kwargs.get("outputs", {}),
kwargs.get("outputs", {}), attrs
if attrs else {},
kwargs.get("stop_gradient", False))
else:
op_desc = self.desc._prepend_op()
......
......@@ -615,9 +615,6 @@ class Optimizer(object):
optimize_ops = self.apply_optimize(
loss, startup_program=startup_program, params_grads=params_grads)
if framework.in_dygraph_mode():
framework._dygraph_tracer()._clear_ops()
return optimize_ops, params_grads
......
......@@ -177,7 +177,7 @@ list(REMOVE_ITEM TEST_OPS test_basic_gru_api)
list(REMOVE_ITEM TEST_OPS test_basic_gru_unit_op)
list(REMOVE_ITEM TEST_OPS test_basic_lstm_api)
list(REMOVE_ITEM TEST_OPS test_basic_lstm_unit_op)
list(REMOVE_ITEM TEST_OPS test_imperative_debug_string)
# Some ops need to check results when gc is enabled
# Currently, only ops that register NoNeedBufferVarsInference need to do this test
set(TEST_OPS_WITH_GC
......@@ -240,6 +240,7 @@ py_test_modules(test_imperative_ocr_attention_model MODULES test_imperative_ocr_
py_test_modules(test_install_check MODULES test_install_check ENVS
FLAGS_cudnn_deterministic=1 SERIAL)
set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST")
py_test_modules(test_imperative_debug_string MODULES test_imperative_debug_string ENVS FLAGS_dygraph_debug=1)
if(WITH_DISTRIBUTE)
py_test_modules(test_dist_train MODULES test_dist_train ENVS ${dist_ENVS})
py_test_modules(test_lookup_remote_table_op MODULES test_lookup_remote_table_op ENVS ${dist_ENVS})
......
......@@ -27,17 +27,40 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, LayerNorm
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.layer_helper import LayerHelper
import math
from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
momentum_rate = 0.9
l2_decay = 1.2e-4
def optimizer_setting(params):
ls = params["learning_strategy"]
if "total_images" not in params:
total_images = 6149
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(math.ceil(float(total_images) / batch_size))
bd = [step * e for e in ls["epochs"]]
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
stride=1,
......@@ -46,26 +69,21 @@ class ConvBNLayer(fluid.dygraph.Layer):
super(ConvBNLayer, self).__init__(name_scope)
self._conv = Conv2D(
self.full_name(),
"conv2d",
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=None)
bias_attr=False,
param_attr=fluid.ParamAttr(name="weights"))
self._batch_norm = BatchNorm(
self.full_name(), num_filters, act=act, momentum=0.1)
self._layer_norm = fluid.dygraph.nn.LayerNorm(
self.full_name(), begin_norm_axis=1)
self._layer_norm = LayerNorm(self.full_name(), begin_norm_axis=1)
def forward(self, inputs):
y = self._conv(inputs)
# FIXME(zcd): when compare the result of multi-card and single-card,
# we should replace batch_norm with layer_norm.
y = self._layer_norm(y)
# y = self._batch_norm(y)
return y
......@@ -76,17 +94,19 @@ class SqueezeExcitation(fluid.dygraph.Layer):
super(SqueezeExcitation, self).__init__(name_scope)
self._pool = Pool2D(
self.full_name(), pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self._squeeze = FC(
self.full_name(),
size=num_channels // reduction_ratio,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05)),
initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='relu')
stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
self._excitation = FC(
self.full_name(),
size=num_channels,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.05)),
initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='sigmoid')
def forward(self, input):
......@@ -110,39 +130,37 @@ class BottleneckBlock(fluid.dygraph.Layer):
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=1)
filter_size=1,
act="relu")
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality)
groups=cardinality,
act="relu")
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters * 4,
num_filters=num_filters * 2,
filter_size=1,
act='relu')
act=None)
self.scale = SqueezeExcitation(
self.full_name(),
num_channels=num_filters * 4,
num_channels=num_filters * 2,
reduction_ratio=reduction_ratio)
if not shortcut:
self.short = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters * 4,
num_filters=num_filters * 2,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
self._num_channels_out = num_filters * 2
def forward(self, inputs):
y = self.conv0(inputs)
......@@ -155,10 +173,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=scale)
layer_helper = LayerHelper(self.full_name(), act='relu')
y = layer_helper.append_activation(y)
y = fluid.layers.elementwise_add(x=short, y=scale, act='relu')
return y
......@@ -178,7 +193,6 @@ class SeResNeXt(fluid.dygraph.Layer):
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
......@@ -196,8 +210,7 @@ class SeResNeXt(fluid.dygraph.Layer):
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
......@@ -214,24 +227,21 @@ class SeResNeXt(fluid.dygraph.Layer):
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=3,
filter_size=7,
num_filters=64,
filter_size=3,
stride=2,
act='relu')
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=64,
num_filters=3,
filter_size=7,
stride=2,
num_filters=64,
filter_size=3,
stride=1,
act='relu')
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=64,
num_filters=3,
filter_size=7,
stride=2,
num_filters=128,
filter_size=3,
stride=1,
act='relu')
self.pool = Pool2D(
self.full_name(),
......@@ -261,16 +271,14 @@ class SeResNeXt(fluid.dygraph.Layer):
self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True)
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.fc = FC(self.full_name(),
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
self.out = FC(self.full_name(),
size=class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs, label):
def forward(self, inputs):
if self.layers == 50 or self.layers == 101:
y = self.conv0(inputs)
y = self.pool(y)
......@@ -283,13 +291,8 @@ class SeResNeXt(fluid.dygraph.Layer):
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
# FIXME(zcd): the dropout should be removed when compare the
# result of multi-card and single-card.
# y = fluid.layers.dropout(y, dropout_prob=0.2, seed=1)
cost = self.fc(y)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
return avg_loss
y = self.out(y)
return y
class TestSeResNeXt(TestParallelDyGraphRunnerBase):
......@@ -312,8 +315,11 @@ class TestSeResNeXt(TestParallelDyGraphRunnerBase):
label = to_variable(y_data)
label.stop_gradient = True
loss = model(img, label)
return loss
out = model(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = fluid.layers.mean(x=loss)
return avg_loss
if __name__ == "__main__":
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import paddle.fluid as fluid
import numpy as np
class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = fluid.dygraph.FC(
self.full_name(),
3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = fluid.dygraph.FC(
self.full_name(),
4,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs)
x = self._fc2(x)
x = fluid.layers.reduce_sum(x)
return x
class TestDygraphDebugString(unittest.TestCase):
def test_dygraph_debug_string(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
unique_name = 0
trace_var = 0
alive_var = 0
with fluid.dygraph.guard():
mlp = MLP("mlp")
for i in range(10):
var_inp = fluid.dygraph.base.to_variable(np_inp)
out = mlp(var_inp)
out.backward()
mlp.clear_gradients()
unique_name_tmp, trace_var_tmp, alive_var_tmp = fluid.dygraph.base._print_debug_msg(
is_test=True)
if i > 0:
self.assertGreaterEqual(unique_name, unique_name_tmp)
self.assertGreaterEqual(trace_var, trace_var_tmp)
self.assertGreaterEqual(alive_var, alive_var_tmp)
else:
unique_name = unique_name_tmp
trace_var = trace_var_tmp
alive_var = alive_var_tmp
try:
fluid.dygraph.base._print_debug_msg()
except Exception as e:
raise RuntimeError(
"No Exception is accepted in _print_debug_msg, but we got: {}".
format(e))
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import paddle.fluid as fluid
import numpy as np
from test_imperative_base import new_program_scope
class MLP(fluid.Layer):
def __init__(self, name_scope):
super(MLP, self).__init__(name_scope)
self._fc1 = fluid.dygraph.FC(
self.full_name(),
3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = fluid.dygraph.FC(
self.full_name(),
4,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs):
x = self._fc1(inputs)
x = self._fc2(x)
x = fluid.layers.reduce_sum(x)
return x
class TestDygraphFramework(unittest.TestCase):
def test_dygraph_backward(self):
with new_program_scope():
mlp = MLP("mlp")
var_inp = fluid.layers.data(
"input", shape=[2, 2], dtype="float32", append_batch_size=False)
out = mlp(var_inp)
try:
out.backward()
raise AssertionError(
"backward should not be usable in static graph mode")
except ValueError as e:
self.assertTrue((e is not None))
def test_dygraph_to_string(self):
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
with fluid.dygraph.guard():
var_inp = fluid.dygraph.base.to_variable(np_inp)
var_inp.to_string(throw_on_error=True)
......@@ -16,3 +16,4 @@ funcsigs
pyyaml
decorator
prettytable
objgraph
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册