提交 d4d215a5 编写于 作者: D Dong Zhihong

Merge remote-tracking branch 'origin/develop' into multigpu

......@@ -243,7 +243,7 @@ class SymbolTable {
// TODO determine whether name is generated by python or C++.
// Currently assume that a unique name will be generated by C++ if the
// argument name is left default.
VarDesc* NewVar(const string& name="");
VarDesc* Var(const string& name="");
// find a VarDesc by name, if recursive is true, find parent's SymbolTable
// recursively.
......
......@@ -37,7 +37,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`.
```cpp
class Scope {
public:
Variable* NewVar(const std::string& name);
Variable* Var(const std::string& name);
const Variable* FindVar(const std::string& name) const;
private:
......@@ -98,7 +98,7 @@ class Scope {
Variable* FindVar(const std::string& name) const;
// return if already contains same name variable.
Variable* NewVar(const std::string& name);
Variable* Var(const std::string& name);
private:
std::shared_ptr<Scope> parent_;
......@@ -107,7 +107,7 @@ class Scope {
```
## Only scope can create a variable
To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `NewVar` can construct `Variable`.
To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `Var` can construct `Variable`.
## When scope destroyed, all variables inside this scope should be destroyed together
......@@ -121,4 +121,4 @@ Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shar
## Orthogonal interface
`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `NewVar` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `NewVar`, we can implement `NewVar` easily.
`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `Var` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `Var`, we can implement `Var` easily.
......@@ -161,7 +161,7 @@ class TensorArray:
@name: str
the name of the variable to output.
'''
tensor = NewVar(name)
tensor = Var(name)
tensor_array_stack(self.name, tensor)
return tensor
......
......@@ -273,18 +273,41 @@ static bool AllGradInSet(const std::vector<std::string>& names,
return true;
}
static void CreateGradVarInBlock(BlockDescBind* block_desc,
size_t grad_op_start_index) {
static void CreateGradVarInBlock(
size_t grad_op_start_index,
const std::unordered_map<std::string, std::string>& param_name_map,
BlockDescBind* block_desc,
std::unordered_map<std::string, GradVarInfo>* grad_var_record) {
auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size();
++op_index) {
for (const auto& output : ops[op_index]->Outputs()) {
for (const auto& real_output : output.second) {
if (!block_desc->HasVar(real_output)) {
block_desc->NewVar(real_output);
}
}
}
// <<<<<<< HEAD
// for (const auto& output : ops[op_index]->Outputs()) {
// for (const auto& real_output : output.second) {
// if (!block_desc->HasVar(real_output)) {
// block_desc->Var(real_output);
// }
// }
// }
// =======
ForEachVarName(ops[op_index]->Outputs(),
[&](const std::string& grad_var_name) {
if (block_desc->HasVar(grad_var_name)) {
return false;
}
block_desc->Var(grad_var_name);
auto it = param_name_map.find(grad_var_name);
if (it == param_name_map.end()) {
return false;
}
auto param_var_name = it->second;
auto& grad_record = (*grad_var_record)[param_var_name];
grad_record.name_ = grad_var_name;
grad_record.block_idx_ = block_desc->ID();
grad_record.op_idx_ = static_cast<int>(op_index);
return false; /* not break */
});
// >>>>>>> origin/develop
}
}
......@@ -400,8 +423,9 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
return backward_descs;
}
void AppendBackward(ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars) {
ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_var_names;
no_grad_var_names.reserve(no_grad_vars.size() + 1);
no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
......@@ -423,20 +447,28 @@ void AppendBackward(ProgramDescBind& program_desc, const VarDescBind& target,
all_ops.push_back(std::move(fill_one_op));
size_t forward_op_num = all_ops.size();
size_t forward_block_num = program_desc.Size();
// Insert backward operators
std::unordered_map<std::string, std::string> grad_to_var;
auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
&no_grad_var_names, &grad_to_var);
std::unordered_map<std::string, GradVarInfo> retv;
// Create Variable
for (auto& ptr : backward_op_descs) {
all_ops.push_back(std::move(ptr));
}
root_block->NewVar(fill_one_op_out);
root_block->Var(fill_one_op_out);
// create grad_var for all blocks in this program
CreateGradVarInBlock(root_block, forward_op_num);
CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
for (size_t block_index = forward_block_num;
block_index < program_desc.Size(); ++block_index) {
CreateGradVarInBlock(program_desc.Block(block_index), 0);
CreateGradVarInBlock(0, grad_to_var, program_desc.Block(block_index),
&retv);
}
return retv;
}
} // namespace framework
......
......@@ -14,7 +14,10 @@
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h"
......@@ -27,10 +30,18 @@ extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars);
// TODO(jiayi): Add target as parameter and generate backward op
// according to target.
void AppendBackward(ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars);
struct GradVarInfo {
std::string name_;
int block_idx_;
int op_idx_;
};
using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework
} // namespace paddle
......@@ -18,19 +18,22 @@ limitations under the License. */
namespace paddle {
namespace framework {
VarDescBind *BlockDescBind::NewVar(const std::string &name) {
VarDescBind *BlockDescBind::Var(const std::string &name) {
need_update_ = true;
auto it = vars_.find(name);
PADDLE_ENFORCE(it == vars_.end(), "Duplicated variable %s", name);
auto var = new VarDescBind(name);
if (it != vars_.end()) {
return it->second.get();
}
auto *var = new VarDescBind(name);
vars_[name].reset(var);
return var;
}
VarDescBind *BlockDescBind::Var(const std::string &name) const {
VarDescBind *BlockDescBind::FindVar(const std::string &name) const {
auto it = vars_.find(name);
PADDLE_ENFORCE(it != vars_.end(),
"Can not find variable %s in current block.", name);
if (it == vars_.end()) {
return nullptr;
}
return it->second.get();
}
......
......@@ -33,15 +33,6 @@ class ProgramDescBind;
class BlockDescBind {
public:
friend std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind &program_desc, int block_idx,
std::unordered_set<std::string> *no_grad_vars,
std::unordered_map<std::string, std::string> *grad_to_var);
friend void AppendBackward(
ProgramDescBind &program_desc, const VarDescBind &target,
const std::unordered_set<std::string> &no_grad_vars);
BlockDescBind(ProgramDescBind *prog, BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) {}
......@@ -49,9 +40,9 @@ class BlockDescBind {
int32_t Parent() const { return desc_->parent_idx(); }
VarDescBind *NewVar(const std::string &name_bytes);
VarDescBind *Var(const std::string &name_bytes);
VarDescBind *Var(const std::string &name_bytes) const;
VarDescBind *FindVar(const std::string &name_bytes) const;
bool HasVar(const std::string &var_name) const;
......@@ -69,7 +60,9 @@ class BlockDescBind {
BlockDesc *Proto();
private:
// FIXME(yuyang18): backward will access private data of BlockDesc.
// Mark it public temporary. We can fix it later.
public:
ProgramDescBind *prog_; // not_own
BlockDesc *desc_; // not_own
bool need_update_;
......
......@@ -66,7 +66,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
// Instantiate all the vars in the global scope
for (auto& var : block.vars()) {
scope->NewVar(var.name());
scope->Var(var.name());
}
Scope& local_scope = scope->NewScope();
......@@ -78,7 +78,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
for (auto& var : block.ops(i).outputs()) {
for (auto& argu : var.arguments()) {
if (local_scope.FindVar(argu) == nullptr) {
local_scope.NewVar(argu);
local_scope.Var(argu);
}
}
}
......
......@@ -46,10 +46,16 @@ void AddOp(const std::string& type, const VariableNameMap& inputs,
// insert output
for (auto kv : outputs) {
for (auto v : kv.second) {
// <<<<<<< HEAD
// auto var = block->Var(v);
// var->SetType(VarDesc::LOD_TENSOR);
// var->SetDataType(paddle::framework::DataType::FP32);
// =======
if (!block->HasVar(v)) {
auto var = block->NewVar(v);
auto var = block->Var(v);
var->SetDataType(paddle::framework::DataType::FP32);
}
// >>>>>>> origin/develop
}
}
......
......@@ -403,11 +403,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {
private:
DDim GetDim(const std::string& name) const override {
return framework::make_ddim(block_.Var(name)->Shape());
return framework::make_ddim(block_.FindVar(name)->Shape());
}
void SetDim(const std::string& name, const DDim& dim) override {
block_.Var(name)->SetShape(framework::vectorize(dim));
block_.FindVar(name)->SetShape(framework::vectorize(dim));
}
const OpDescBind& op_;
......
......@@ -84,7 +84,7 @@ TEST(OperatorBase, all) {
paddle::framework::Scope scope;
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
scope.NewVar("OUT1");
scope.Var("OUT1");
ASSERT_EQ(paddle::framework::op_run_num, 0);
op->Run(scope, device_context);
ASSERT_EQ(paddle::framework::op_run_num, 1);
......@@ -237,12 +237,12 @@ TEST(OpKernel, multi_inputs) {
paddle::platform::CPUDeviceContext cpu_device_context;
paddle::framework::Scope scope;
scope.NewVar("x0")->GetMutable<Tensor>();
scope.NewVar("x1")->GetMutable<Tensor>();
scope.NewVar("x2")->GetMutable<Tensor>();
scope.NewVar("k0")->GetMutable<Tensor>();
scope.NewVar("y0")->GetMutable<Tensor>();
scope.NewVar("y1")->GetMutable<Tensor>();
scope.Var("x0")->GetMutable<Tensor>();
scope.Var("x1")->GetMutable<Tensor>();
scope.Var("x2")->GetMutable<Tensor>();
scope.Var("k0")->GetMutable<Tensor>();
scope.Var("y0")->GetMutable<Tensor>();
scope.Var("y1")->GetMutable<Tensor>();
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_device_context);
......
......@@ -31,7 +31,7 @@ Scope& Scope::NewScope() const {
return *kids_.back();
}
Variable* Scope::NewVar(const std::string& name) {
Variable* Scope::Var(const std::string& name) {
auto iter = vars_.find(name);
if (iter != vars_.end()) {
return iter->second;
......@@ -42,8 +42,8 @@ Variable* Scope::NewVar(const std::string& name) {
return v;
}
Variable* Scope::NewVar() {
return NewVar(string::Sprintf("%p.%d", this, vars_.size()));
Variable* Scope::Var() {
return Var(string::Sprintf("%p.%d", this, vars_.size()));
}
Variable* Scope::FindVar(const std::string& name) const {
......@@ -71,8 +71,8 @@ framework::Scope& GetGlobalScope() {
static std::unique_ptr<framework::Scope> g_scope{nullptr};
std::call_once(feed_variable_flag, [&]() {
g_scope.reset(new framework::Scope());
g_scope->NewVar("feed_value");
g_scope->NewVar("fetch_value");
g_scope->Var("feed_value");
g_scope->Var("fetch_value");
});
return *(g_scope.get());
}
......
......@@ -45,10 +45,10 @@ class Scope {
Scope& NewScope() const;
/// Create a variable with given name if it doesn't exist.
Variable* NewVar(const std::string& name);
Variable* Var(const std::string& name);
/// Create a variable with a scope-unique name.
Variable* NewVar();
Variable* Var();
/// Find a variable in the scope or any of its ancestors. Returns
/// nullptr if cannot find.
......
......@@ -23,8 +23,8 @@ TEST(Scope, VarsShadowing) {
Scope& ss1 = s.NewScope();
Scope& ss2 = s.NewScope();
Variable* v0 = s.NewVar("a");
Variable* v1 = ss1.NewVar("a");
Variable* v0 = s.Var("a");
Variable* v1 = ss1.Var("a");
EXPECT_NE(v0, v1);
......@@ -40,7 +40,7 @@ TEST(Scope, FindVar) {
EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_EQ(nullptr, ss.FindVar("a"));
ss.NewVar("a");
ss.Var("a");
EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_NE(nullptr, ss.FindVar("a"));
......@@ -49,7 +49,7 @@ TEST(Scope, FindVar) {
TEST(Scope, FindScope) {
Scope s;
Scope& ss = s.NewScope();
Variable* v = s.NewVar("a");
Variable* v = s.Var("a");
EXPECT_EQ(&s, s.FindScope(v));
EXPECT_EQ(&s, ss.FindScope(v));
......
......@@ -86,6 +86,7 @@ protected:
/// Also used in 'use_mkldnn' case.
std::vector<Argument> outputOtherDevice_;
/// If there are several outputs, map them by each name.
/// MKLDNNLayer use it only to merge output grad
std::map<std::string, Argument*> outputMap_;
/// Used to merge grad on different devices.
MatrixPtr tmpGrad_;
......@@ -325,6 +326,11 @@ public:
outputMap_[name] = output;
}
/**
* Get the output map size, if layer has multi-output.
*/
size_t getOutputMapSize() { return outputMap_.size(); }
/**
* Get the output based on layer's name.
*/
......
......@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtInVal_) {
pipeline.push_back(*cvtInVal_);
}
......@@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline(
void MKLDNNConvLayer::resetInValue(
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
const MatrixPtr& inMat = inputLayers_[0]->getOutput().value;
const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
// create buffer and reorder if input value do not match
......@@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue(
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) {
out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry";
CHECK(cvtOutVal_) << "should not be empty";
} else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out;
}
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
}
output_.value = std::dynamic_pointer_cast<Matrix>(out);
}
void MKLDNNConvLayer::resetBwdWgtPD(
......@@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_);
}
......@@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline(
void MKLDNNConvLayer::resetOutGrad(
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) {
const MatrixPtr& outMat = output_.grad;
out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc());
CHECK(outVal_ != nullptr &&
out->getPrimitiveDesc() == outVal_->getPrimitiveDesc())
<< "primitive desc of out grad and value should be equal";
// TODO(TJ): merge outgrad
// create reorder if has output grad does not match
cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr;
if (!outputIsOnlyMKLDNN()) {
CHECK(outVal_ != nullptr &&
outVal_->getPrimitiveDesc() == wgtPD->diff_dst_primitive_desc())
<< "primitive desc of out grad and value should be equal";
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
outMat->setData(cpuOut->getData());
// same PrimitiveDesc with cpuInVal_
CHECK(cpuOutVal_);
cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) {
out = cpuOutGrad_;
} else {
out = MKLDNNMatrix::create(nullptr, wgtPD->diff_dst_primitive_desc());
// create reorder if primitive desc does not match
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_);
} else {
// share the same data of CPU output
output_.grad->setData(cpuOut->getData());
out = cpuOutGrad_;
}
}
}
......@@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad(
void MKLDNNConvLayer::resetInGrad(
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in) {
in = nullptr;
cpuInGrad_ = nullptr;
cvtInGrad_ = nullptr;
if (dataPD == nullptr) {
return;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad,
dataPD->diff_src_primitive_desc());
CHECK(nullptr != inVal_ &&
in->getPrimitiveDesc() == inVal_->getPrimitiveDesc())
<< "primitive desc of input grad and value should be equal";
// create reorder if has output grad does not match
cpuInGrad_ = nullptr;
cvtInGrad_ = nullptr;
if (!inputIsOnlyMKLDNN()) {
if (inputIsOnlyMKLDNN()) {
MKLDNNLayer::resetInGrad(in, dataPD->diff_src_primitive_desc());
CHECK(nullptr != inVal_ &&
in->getPrimitiveDesc() == inVal_->getPrimitiveDesc())
<< "primitive desc of input grad and value should be equal";
} else {
const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE);
// same PrimitiveDesc with cpuInVal_
CHECK(cpuInVal_);
cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc());
if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) {
const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE);
in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc());
in = cpuInGrad_;
// create reorder if PrimitiveDesc does not match
if (cpuInGrad_->getPrimitiveDesc() != dataPD->diff_src_primitive_desc()) {
in = MKLDNNMatrix::create(getInputGrad(0, MKLDNN_DEVICE),
dataPD->diff_src_primitive_desc());
cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_);
CHECK(cvtInGrad_);
} else {
in = cpuInGrad_;
}
}
}
......
......@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
if (!outputIsOnlyMKLDNN()) {
// fc cpu output value do not need create convert
// just share point
// fc cpu output value do not need create convert, just share data
getOutput(CPU_DEVICE).value->setData(out->getData());
}
output_.value = std::dynamic_pointer_cast<Matrix>(out);
}
void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
......@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
if (bias) {
fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out));
} else {
......@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
}
void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
// TODO(TJ): merge outgrad
int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
output_.grad->setData(getOutput(device).grad->getData());
// for MKLDNN device:
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
CHECK(outVal_);
out =
MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc());
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
output_.grad->setData(cpuOut->getData());
out = MKLDNNMatrix::create(cpuOut, outVal_->getPrimitiveDesc());
}
}
void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
......@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr;
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
if (inGrad == nullptr) {
if (inputLayers_[0]->getOutput().grad == nullptr) {
return;
}
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
CHECK(inVal_);
in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc());
MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
}
void MKLDNNFcLayer::resetBwdWgtPD(
......@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) {
pipeline.clear();
CHECK(inVal_);
if (bias) {
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias));
......
......@@ -65,6 +65,17 @@ protected:
MKLDNNMatrixPtr biasVal_;
MKLDNNMatrixPtr biasGrad_;
// merge grad primitive
std::shared_ptr<mkldnn::primitive> mergeGrad_;
std::vector<mkldnn::primitive> pipelineMergeGrad_;
// tmp input argument to save input grad, only used to merge grad
Argument tmpInArg_;
// since mkldnn sum do not support different formats:
// can refer to https://github.com/01org/mkl-dnn/issues/134
// so need create reorder manually and save tmp MKLDNNMatrix
MKLDNNMatrixPtr tmpOutGrad_;
std::shared_ptr<mkldnn::primitive> tmpCvt_;
public:
explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config),
......@@ -99,6 +110,7 @@ public:
if (!Layer::init(layerMap, parameterMap)) {
return false;
}
setOutputMap();
checkCPUOutputsNumber();
stream_.reset(new MKLDNNStream());
......@@ -118,12 +130,9 @@ public:
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_ = elemenCnt;
pipelineFwd_.clear();
reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
if (outVal_) {
// change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
}
convertWeightsFromPaddle();
needResetBwd_ = true;
}
......@@ -144,9 +153,18 @@ public:
void backward(const UpdateCallback& callback) override {
if (needResetBwd_) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
pipelineBwd_.clear();
pipelineMergeGrad_.clear();
mergeGrad_ = nullptr;
resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_);
needResetBwd_ = false;
}
// merge grad must before backward activation
if (mergeGrad_) {
REGISTER_TIMER_INFO("MergeBpGrad", getName().c_str());
stream_->submit(pipelineMergeGrad_);
}
{
REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation();
......@@ -247,6 +265,76 @@ protected:
}
}
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual void resetOutGrad(MKLDNNMatrixPtr& out,
mkldnn::memory::primitive_desc pd) {
CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet";
mergeGrad_ = nullptr;
pipelineMergeGrad_.clear();
out = MKLDNNMatrix::create(output_.grad, pd);
if (outputMap_.size() <= 1) {
return;
}
std::vector<double> scales(outputMap_.size(), 1.0);
std::vector<mkldnn::memory::primitive_desc> srcPDs;
std::vector<mkldnn::primitive::at> srcs;
for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) {
MKLDNNMatrixPtr src =
std::dynamic_pointer_cast<MKLDNNMatrix>(it->second->grad);
VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first;
CHECK(src) << "should be MKLDNNMatrix";
auto srcDims = src->getDims();
auto dstDims = out->getDims();
CHECK_EQ(srcDims.size(), dstDims.size());
for (size_t i = 0; i < srcDims.size(); ++i) {
CHECK_EQ(srcDims[i], dstDims[i]);
}
srcPDs.push_back(src->getPrimitiveDesc());
srcs.push_back(*src);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for (size_t i = 1; i < srcPDs.size(); ++i) {
CHECK(srcPDs[0] == srcPDs[i]);
}
tmpOutGrad_ = nullptr;
tmpCvt_ = nullptr;
if (out->getPrimitiveDesc() != srcPDs[0]) {
tmpOutGrad_ = MKLDNNMatrix::create(nullptr, srcPDs[0]);
tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out);
CHECK(tmpCvt_);
pipelineMergeGrad_.push_back(*tmpCvt_);
} else {
tmpOutGrad_ = out;
}
auto sumPD = mkldnn::sum::primitive_desc(
tmpOutGrad_->getMemoryDesc(), scales, srcPDs);
mergeGrad_.reset(new mkldnn::sum(sumPD, srcs, *tmpOutGrad_));
pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual void resetInGrad(MKLDNNMatrixPtr& in,
mkldnn::memory::primitive_desc pd) {
LayerPtr& input = inputLayers_[0];
const MatrixPtr& grad =
input->getOutputMapSize() > 1 ? nullptr : input->getOutput().grad;
in = MKLDNNMatrix::create(grad, pd);
Argument& arg = input->getOutput(this->getName());
arg.grad = std::dynamic_pointer_cast<Matrix>(in);
}
/**
* print info about sizes
*/
......@@ -334,6 +422,16 @@ private:
}
}
/**
* Set output map of prev layers.
*/
void setOutputMap() {
outputMap_.clear();
for (size_t i = 0; i < inputLayers_.size(); ++i) {
inputLayers_[i]->setOutput(getName(), &tmpInArg_);
}
}
/**
* Check the cpu device number of outputOtherDevice_.
* should have only one at most.
......
......@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry";
} else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out;
}
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
}
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
}
void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
......@@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline(
std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
pipeline.clear();
fwd_ = workspace_
? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_))
: std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out));
......@@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
resetInGrad(in);
}
void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
CHECK(outVal_) << "Should have output value";
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
// create reorder if output value has cpu device and pd do not match
cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr;
if (!outputIsOnlyMKLDNN()) {
CHECK(outVal_);
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
cpuOutGrad_ = MKLDNNMatrix::create(
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
if (cpuOutGrad_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_) << "should not be emptry";
} else {
......@@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr;
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
if (inGrad == nullptr) {
if (inputLayers_[0]->getOutput().grad == nullptr) {
return;
}
CHECK(inVal_);
in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc());
MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
}
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
......@@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_);
}
......
......@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() {
void MKLDNNTester::checkForward() {
VLOG(MKLDNN_ALL) << "Check Forward";
printTopDatas();
double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value,
refLayer_->getOutputValue());
double delta =
compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_);
}
......
......@@ -134,7 +134,7 @@ void CondOp::PrepareDataForSubnet(
for (int i = 0; i < BRANCH_NUM; ++i) {
for (auto& output : (*sub_net_op_[i]).Outputs()) {
for (auto& var_name : output.second) {
sub_scopes[i]->NewVar(var_name);
sub_scopes[i]->Var(var_name);
}
}
}
......
......@@ -30,7 +30,7 @@ namespace detail {
inline void CreateVariables(Scope& scope,
const std::vector<std::string>& var_names) {
for (const auto& name : var_names) {
scope.NewVar(name);
scope.Var(name);
}
}
......@@ -136,7 +136,7 @@ void DynamicRecurrentOp::WriteStepInputs() const {
auto& step_scope = cache_.GetScope(step);
Variable* var = step_scope.FindVar(item.first);
if (var == nullptr) {
var = step_scope.NewVar(item.first);
var = step_scope.Var(item.first);
}
var->GetMutable<LoDTensor>()->ShareDataWith<value_type>(tensor);
}
......
......@@ -36,7 +36,7 @@ void OpDescNewVar(const std::string& param_name,
// create a LoD tensor in scope with specific dims
LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims,
const platform::Place& place) {
auto* var = scope.NewVar(name);
auto* var = scope.Var(name);
auto* tensor = var->GetMutable<LoDTensor>();
tensor->Resize(dims);
tensor->mutable_data<float>(place);
......@@ -85,7 +85,7 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test {
void CreateGlobalVariables() {
platform::CPUPlace place;
scope.NewVar("step_scopes");
scope.Var("step_scopes");
CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place);
CreateVar(scope, "out0", framework::make_ddim({10, 20}), place);
auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place);
......
......@@ -70,14 +70,14 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
// the weight are located in parent scope
for (auto& var_name : input.second) {
if (!step_scope.FindVar(var_name)) {
step_scope.NewVar(var_name)->GetMutable<LoDTensor>();
step_scope.Var(var_name)->GetMutable<LoDTensor>();
}
}
}
// create stepnet's outputs
for (const auto& output : (*stepnet_)->Outputs()) {
for (auto& var_name : output.second) {
step_scope.NewVar(var_name);
step_scope.Var(var_name);
}
}
step_scopes->emplace_back(&step_scope);
......@@ -87,7 +87,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
for (auto& attr : arg_->memories) {
auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<LoDTensor>();
auto* pre_mem = step_scope->Var(attr.pre_var)->GetMutable<LoDTensor>();
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"memory [%s]'s boot variable [%s] not exists", attr.var,
attr.boot_var);
......@@ -167,9 +167,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
"memory variable [%s] does not exists", attr.var);
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"boot variable [%s] does not exists", attr.boot_var);
auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable<LoDTensor>();
auto* mem_grad = step_scope->Var(attr.var)->GetMutable<LoDTensor>();
auto* boot_mem_grad =
step_scope->NewVar(attr.boot_var)->GetMutable<LoDTensor>();
step_scope->Var(attr.boot_var)->GetMutable<LoDTensor>();
boot_mem_grad->Resize(mem_grad->dims());
boot_mem_grad->ShareDataWith<float>(*mem_grad);
}
......
......@@ -40,7 +40,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
f::DDim step_dims = slice_ddim(dims, 1, dims.size());
for (size_t j = 0; j < seq_len; j++) {
Tensor* step_input =
step_scopes[j]->NewVar(inlinks[i])->GetMutable<Tensor>();
step_scopes[j]->Var(inlinks[i])->GetMutable<Tensor>();
// The input of operators of each step is Tensor here.
// Maybe need to modify Slice function.
*step_input = input->Slice<float>(j, j + 1);
......
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc
DEPS pybind python backward proto_desc tensor_array paddle_memory
DEPS pybind python backward proto_desc tensor_array paddle_memory executor
${GLOB_OP_LIB})
endif(WITH_PYTHON)
......@@ -120,7 +120,19 @@ void BindProgramDesc(py::module &m) {
.def("append_backward",
[](ProgramDescBind &program_desc, const VarDescBind &target,
const std::unordered_set<std::string> &no_grad_vars) {
AppendBackward(program_desc, target, no_grad_vars);
ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars);
std::unordered_map<
std::string, std::tuple<std::string /* grad_var_name */,
int /* block_idx */, int /* op_idx */>>
retv;
for (auto it = param_grad_map.begin(); it != param_grad_map.end();
++it) {
const auto &grad_info = it->second;
retv[it->first] = std::make_tuple(
grad_info.name_, grad_info.block_idx_, grad_info.op_idx_);
}
return retv;
})
.def("block", &ProgramDescBind::Block, py::return_value_policy::reference)
.def("num_blocks", &ProgramDescBind::Size)
......@@ -145,16 +157,16 @@ void BindBlockDesc(py::module &m) {
py::return_value_policy::reference)
.def("prepend_op", &BlockDescBind::PrependOp,
py::return_value_policy::reference)
.def("new_var",
.def("var",
[](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name;
return self.NewVar(name);
return self.Var(name);
},
py::return_value_policy::reference)
.def("var",
.def("find_var",
[](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name;
return self.Var(name);
return self.FindVar(name);
},
py::return_value_policy::reference)
.def("all_vars", &BlockDescBind::AllVars,
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/pybind/protobuf.h"
#include "paddle/framework/backward.h"
#include "paddle/framework/executor.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/tensor_array.h"
#include "paddle/operators/cond_op.h"
......@@ -164,9 +165,9 @@ All parameter, weight, gradient are variables in Paddle.
py::return_value_policy::reference);
py::class_<Scope>(m, "Scope", "")
.def("new_var",
.def("var",
[](Scope &self, const std::string &name) -> Variable * {
return self.NewVar(name);
return self.Var(name);
},
py::return_value_policy::reference)
.def("find_var", &Scope::FindVar, py::return_value_policy::reference)
......@@ -391,6 +392,14 @@ All parameter, weight, gradient are variables in Paddle.
self.set_falsenet(net.Clone());
});
py::class_<framework::Executor>(m, "Executor")
.def(py::init<std::vector<platform::Place> &>())
.def("run",
[](Executor &self, const ProgramDesc &program_desc, int block_id) {
framework::Scope &global_scope = GetGlobalScope();
self.Run(program_desc, &global_scope, block_id);
});
m.def("unique_integer", UniqueIntegerGenerator);
m.def("is_compile_gpu", IsCompileGPU);
......
......@@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets
################ test_CompareMKLDNNandCPU ######################
if(WITH_MKLDNN)
add_unittest_without_exec(test_CompareMKLDNNandCPU
test_CompareTwoNets.cpp)
add_test(NAME test_CompareMKLDNNandCPU
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU
--config_file_a=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_a=True
--config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False
--use_gpu=False
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
macro(gen_command VAR_NAME CONFIG_FILE)
set(${VAR_NAME} "${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh" "-d" "${PADDLE_SOURCE_DIR}/python/"
"${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU --use_gpu=False"
"--config_file_a=trainer/tests/${CONFIG_FILE} --use_mkldnn_a=True"
"--config_file_b=trainer/tests/${CONFIG_FILE} --use_mkldnn_b=False"
"WORKING_DIRECTORY" "${PADDLE_SOURCE_DIR}/paddle/")
endmacro()
add_unittest_without_exec(test_CompareMKLDNNandCPU test_CompareTwoNets.cpp)
gen_command(compare_simple_net "sample_trainer_config_simple_net.conf")
gen_command(compare_branch_net "sample_trainer_config_branch_net.conf")
add_test(NAME test_CompareMKLDNNandCPU_simple_net COMMAND ${compare_simple_net})
add_test(NAME test_CompareMKLDNNandCPU_branch_net COMMAND ${compare_branch_net})
endif()
############### test_CompareTwoOpts ###################
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ###################################
settings(batch_size = 256,
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
################################### Network Configuration ###################################
data = data_layer(name ="input", size=784)
tmp = img_conv_layer(input=data,
num_channels=1,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
a1 = img_conv_layer(input=tmp,
filter_size=1,
num_filters=32,
padding=0,
shared_biases=True,
act=ReluActivation())
a2 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
tmp = concat_layer(input=[a1, a2])
tmp = img_pool_layer(input=tmp,
num_channels=64,
pool_size=3,
stride=2,
padding=1,
pool_type=AvgPooling())
b1 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=64,
padding=1,
shared_biases=True,
act=ReluActivation())
b1 = img_pool_layer(input=b1,
pool_size=3,
stride=1,
padding=1,
pool_type=MaxPooling())
b2 = img_conv_layer(input=tmp,
filter_size=5,
num_filters=64,
padding=2,
shared_biases=True,
act=ReluActivation())
b2 = img_pool_layer(input=b2,
pool_size=5,
stride=1,
padding=2,
pool_type=MaxPooling())
tmp = addto_layer(input=[b1, b2],
act=ReluActivation(),
bias_attr=False)
tmp = img_pool_layer(input=tmp,
pool_size=3,
stride=2,
padding=1,
pool_type=MaxPooling())
tmp = fc_layer(input=tmp, size=64,
bias_attr=False,
act=TanhActivation())
output = fc_layer(input=tmp, size=10,
bias_attr=True,
act=SoftmaxActivation())
lbl = data_layer(name ="label", size=10)
cost = classification_cost(input=output, label=lbl)
outputs(cost)
......@@ -5,7 +5,7 @@ Default scope function.
thread-local stack of Scope. Top of that stack is current scope, the bottom
of that stack is all scopes' parent.
Invoking `new_var/find_var` can `new/find` variable in current scope.
Invoking `var/find_var` can `new/find` variable in current scope.
Invoking `enter_local_scope/leave_local_scope` can create or destroy local
scope.
......@@ -19,7 +19,7 @@ import threading
__tl_scope__ = threading.local()
__all__ = [
'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'new_var',
'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'var',
'find_var', 'scoped_function'
]
......@@ -54,11 +54,11 @@ def leave_local_scope():
get_cur_scope().drop_kids()
def new_var(name):
def var(name):
"""
create variable in current scope.
"""
return get_cur_scope().new_var(name)
return get_cur_scope().var(name)
def find_var(name):
......
......@@ -20,11 +20,11 @@ class Variable(object):
if name is None:
name = Variable._unique_var_name_()
try:
is_new_var = False
self.desc = self.block.desc.find_var(name)
if self.desc is None:
self.desc = self.block.desc.var(name)
is_new_var = False
except core.EnforceNotMet:
self.desc = self.block.desc.new_var(name)
is_new_var = True
if is_new_var:
......
......@@ -14,7 +14,7 @@ def create_op(scope, op_type, inputs, outputs, attrs):
kwargs = dict()
def __create_var__(name, var_name):
scope.new_var(var_name)
scope.var(var_name)
kwargs[name].append(var_name)
for in_name, in_dup in Operator.get_op_inputs(op_type):
......@@ -71,7 +71,7 @@ def set_input(scope, op, inputs, place):
def set_output_grad(scope, op, outputs, place):
def __set_tensor__(name):
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
grad_tensor = scope.var(grad_var_name(name)).get_tensor()
out_dtype = out_tensor.dtype()
if out_dtype == core.DataType.FP64:
data = np.ones(out_tensor.shape(), dtype=np.float64)
......@@ -169,10 +169,10 @@ def get_numeric_gradient(scope,
def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars():
var = scope.new_var(input)
var = scope.var(input)
var.get_tensor()
for output in backward_op.output_vars():
var = scope.new_var(output)
var = scope.var(output)
var.get_tensor()
return backward_op
......
......@@ -39,7 +39,7 @@ class PySimpleCondTest(unittest.TestCase):
def create_tensor(scope, name, shape, np_data):
tensor = scope.new_var(name).get_tensor()
tensor = scope.var(name).get_tensor()
tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace())
return tensor
......@@ -74,9 +74,9 @@ class TestCondOp(unittest.TestCase):
create_tensor(self.scope, "X", [10, 1], x_np_data)
cond_np_data = self.py_cond.cond.astype("int32")
create_tensor(self.scope, "cond", [10, 1], cond_np_data)
self.scope.new_var("SubScopes")
self.scope.new_var("IndexTensors")
self.scope.new_var("Out")
self.scope.var("SubScopes")
self.scope.var("IndexTensors")
self.scope.var("Out")
def create_cond_op(self):
self.condop = CondOp(
......
......@@ -10,7 +10,7 @@ class TestDefaultScopeFuncs(unittest.TestCase):
self.assertIsNone(find_var("test"))
def test_create_var_get_var(self):
var_a = new_var("var_a")
var_a = var("var_a")
self.assertIsNotNone(var_a)
self.assertIsNotNone(get_cur_scope().find_var('var_a'))
enter_local_scope()
......@@ -19,7 +19,7 @@ class TestDefaultScopeFuncs(unittest.TestCase):
def test_var_get_int(self):
def __new_scope__():
i = new_var("var_i")
i = var("var_i")
self.assertFalse(i.is_int())
i.set_int(10)
self.assertTrue(i.is_int())
......
......@@ -6,7 +6,7 @@ import numpy as np
def create_tensor(scope, name, shape, np_data):
tensor = scope.new_var(name).get_tensor()
tensor = scope.var(name).get_tensor()
tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace())
return tensor
......@@ -72,8 +72,8 @@ class DynamicRecurrentOpTest(unittest.TestCase):
create_tensor(self.scope, "U", [self.input_dim, self.input_dim], U)
create_tensor(self.scope, "h_boot", [self.num_sents, self.input_dim],
h_boot)
self.scope.new_var("step_scopes")
self.scope.new_var("h@mem")
self.scope.var("step_scopes")
self.scope.var("h@mem")
def create_rnn_op(self):
# create RNNOp
......
......@@ -14,7 +14,7 @@ class TestGaussianRandomOp(unittest.TestCase):
def gaussian_random_test(self, place):
scope = core.Scope()
scope.new_var('Out').get_tensor()
scope.var('Out').get_tensor()
op = Operator(
"gaussian_random",
......
......@@ -13,14 +13,14 @@ class TestInferShape(unittest.TestCase):
shape = [10, 20]
# prepare input/output
x1 = block.new_var("x1")
x1 = block.var("x1")
x1.set_type(core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(shape)
x2 = block.new_var("x2")
x2 = block.var("x2")
x2.set_type(core.VarDesc.VarType.LOD_TENSOR)
x2.set_shape(shape)
out = block.new_var("out")
out = block.var("out")
out.set_type(core.VarDesc.VarType.LOD_TENSOR)
# prepare the operator
......@@ -42,14 +42,14 @@ class TestInferShape(unittest.TestCase):
y_shape = [20, 30]
# prepare input/output
x1 = block.new_var("x")
x1 = block.var("x")
x1.set_type(core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(x_shape)
x2 = block.new_var("y")
x2 = block.var("y")
x2.set_type(core.VarDesc.VarType.LOD_TENSOR)
x2.set_shape(y_shape)
out = block.new_var("out")
out = block.var("out")
out.set_type(core.VarDesc.VarType.LOD_TENSOR)
# prepare the operator
......
......@@ -31,7 +31,7 @@ uniq_id = atomic_id().next
def data_layer(name, dims):
var = scope.new_var(name)
var = scope.var(name)
tensor = var.get_tensor()
tensor.set_dims(dims) # 1 is batch size holder.
return name
......@@ -67,7 +67,7 @@ def sgd_optimizer(net, param_name, learning_rate=0.005):
# should use operator and add these to the init_network
def init_param(net, param_name, dims):
scope.new_var(param_name)
scope.var(param_name)
op = Operator(
"uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10)
op.infer_shape(scope)
......@@ -104,7 +104,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01)
pre_activation = name + ".mul.out"
scope.new_var(pre_activation)
scope.var(pre_activation)
mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation)
net.append_op(mul_op)
......@@ -115,7 +115,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
sgd_optimizer(
net=optimize_net, param_name=bias_name, learning_rate=0.001)
bias_out = name + ".rowwise_add.out"
scope.new_var(bias_out)
scope.var(bias_out)
rowwise_append_op = Operator(
"rowwise_add", X=pre_activation, b=bias_name, Out=bias_out)
net.append_op(rowwise_append_op)
......@@ -123,7 +123,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
activation_op = Operator(act, X=pre_activation, Y=name)
net.append_op(activation_op)
scope.new_var(name)
scope.var(name)
net.infer_shape(scope)
return name
......@@ -133,7 +133,7 @@ def cross_entropy_layer(net, input, label):
cross_entropy_op = Operator(
"cross_entropy", X=input, Label=label, Y=cost_name)
net.append_op(cross_entropy_op)
scope.new_var(cost_name)
scope.var(cost_name)
net.infer_shape(scope)
return cost_name
......@@ -141,10 +141,10 @@ def cross_entropy_layer(net, input, label):
def create_backward_net(forward_net):
net = core.Operator.backward(forward_net, set())
for input in net.inputs()["all"]:
var = scope.new_var(input)
var = scope.var(input)
var.get_tensor()
for output in net.outputs()["all"]:
var = scope.new_var(output)
var = scope.var(output)
var.get_tensor()
return net
......
......@@ -51,17 +51,24 @@ class TestProgram(unittest.TestCase):
sum_op_desc.set_input("Y", ["b1"])
sum_op_desc.set_output("Out", ["out2"])
target = block.new_var("out2")
target = block.var("out2")
expect_ops = [
"mul", "elementwise_add", "fill_constant", "elementwise_add_grad",
"mul_grad"
]
def grad_name(name):
return name + "@GRAD"
actual_ops = []
prog.append_backward(target, set())
param_to_grad = prog.append_backward(target, set())
for var_name in ("x1", "y1", "out1", "b1"):
self.assertEqual(param_to_grad[var_name][0], grad_name(var_name))
self.assertEqual(param_to_grad[var_name][1], 0)
for op in block.all_ops():
actual_ops.append(op.type())
print(actual_ops)
self.assertEqual(actual_ops, expect_ops)
......
......@@ -93,7 +93,7 @@ class TestVarDesc(unittest.TestCase):
def test_shape(self):
program_desc = core.ProgramDesc.__create_program_desc__()
block = program_desc.block(0)
var = block.new_var('my_var')
var = block.var('my_var')
var.set_type(core.VarDesc.VarType.SELECTED_ROWS)
src_shape = [3, 2, 10, 8]
var.set_shape(src_shape)
......@@ -104,7 +104,7 @@ class TestVarDesc(unittest.TestCase):
def test_data_type(self):
program_desc = core.ProgramDesc.__create_program_desc__()
block = program_desc.block(0)
var = block.new_var('my_var')
var = block.var('my_var')
var.set_type(core.VarDesc.VarType.LOD_TENSOR)
var.set_data_type(core.DataType.INT32)
self.assertEqual(core.DataType.INT32, var.data_type())
......@@ -117,12 +117,12 @@ class TestBlockDesc(unittest.TestCase):
self.assertIsNotNone(prog)
block = prog.block(0)
self.assertIsNotNone(block)
var1 = block.new_var("var1")
var2 = block.new_var("var2")
var3 = block.new_var("var3")
var1 = block.var("var1")
var2 = block.var("var2")
var3 = block.var("var3")
all_vars = block.all_vars()
self.assertEqual(set(all_vars), set([var1, var2, var3]))
var2_re = block.var("var2")
var2_re = block.find_var("var2")
self.assertEqual(var2_re, var2)
def test_add_op(self):
......
......@@ -66,7 +66,7 @@ class PySimpleRNNTest(unittest.TestCase):
def create_tensor(scope, name, shape, np_data):
tensor = scope.new_var(name).get_tensor()
tensor = scope.var(name).get_tensor()
tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace())
return tensor
......@@ -125,8 +125,8 @@ class RecurrentOpTest(unittest.TestCase):
h_boot_np_data = self.py_rnn.h_boot
create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim],
h_boot_np_data)
self.scope.new_var("step_scopes")
self.scope.new_var("h@mem")
self.scope.var("step_scopes")
self.scope.var("h@mem")
def create_rnn_op(self):
# create RNNOp
......
......@@ -18,7 +18,7 @@ class TestScope(unittest.TestCase):
def test_create_var_get_var(self):
paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope()
var_a = scope.new_var("var_a")
var_a = scope.var("var_a")
self.assertIsNotNone(var_a)
self.assertIsNotNone(scope.find_var('var_a'))
scope2 = scope.new_scope()
......@@ -27,7 +27,7 @@ class TestScope(unittest.TestCase):
def test_var_get_int(self):
paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope()
var = scope.new_var("test_int")
var = scope.var("test_int")
var.set_int(10)
self.assertTrue(var.is_int())
self.assertEqual(10, var.get_int())
......
......@@ -6,7 +6,7 @@ import numpy
class TestTensor(unittest.TestCase):
def test_int_tensor(self):
scope = core.Scope()
var = scope.new_var("test_tensor")
var = scope.var("test_tensor")
place = core.CPUPlace()
tensor = var.get_tensor()
......@@ -25,7 +25,7 @@ class TestTensor(unittest.TestCase):
def test_float_tensor(self):
scope = core.Scope()
var = scope.new_var("test_tensor")
var = scope.var("test_tensor")
place = core.CPUPlace()
tensor = var.get_tensor()
......@@ -46,7 +46,7 @@ class TestTensor(unittest.TestCase):
def test_int_lod_tensor(self):
place = core.CPUPlace()
scope = core.Scope()
var_lod = scope.new_var("test_lod_tensor")
var_lod = scope.var("test_lod_tensor")
lod_tensor = var_lod.get_tensor()
lod_tensor.set_dims([4, 4, 6])
......@@ -68,7 +68,7 @@ class TestTensor(unittest.TestCase):
def test_float_lod_tensor(self):
place = core.CPUPlace()
scope = core.Scope()
var_lod = scope.new_var("test_lod_tensor")
var_lod = scope.var("test_lod_tensor")
lod_tensor = var_lod.get_tensor()
lod_tensor.set_dims([5, 2, 3, 4])
......
......@@ -13,7 +13,7 @@ class TestTensorArray(unittest.TestCase):
# create a LoDTensor
self.scope = core.Scope()
var = self.scope.new_var("test_tensor")
var = self.scope.var("test_tensor")
self.place = core.CPUPlace()
tensor = var.get_tensor()
tensor.set_dims([self.batch_size, self.dim])
......@@ -51,7 +51,7 @@ class TestTensorArray(unittest.TestCase):
self.ta.unstack(self.tensor)
# create a tensor with shape of [1, self.dim]
var = self.scope.new_var("hell")
var = self.scope.var("hell")
tensor = var.get_tensor()
tensor.set_dims([1, self.dim])
tensor.alloc_float(self.place)
......@@ -71,7 +71,7 @@ class TestTensorArray(unittest.TestCase):
self.ta.unstack(self.tensor)
# create a tensor with shape of [1, self.dim]
var = self.scope.new_var("hell")
var = self.scope.var("hell")
tensor = var.get_tensor()
tensor.set_dims([1, self.dim])
tensor.alloc_float(self.place)
......
......@@ -14,7 +14,7 @@ class TestUniformRandomOp(unittest.TestCase):
def uniform_random_test(self, place):
scope = core.Scope()
scope.new_var('X').get_tensor()
scope.var('X').get_tensor()
op = Operator(
"uniform_random",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册