提交 9942a304 编写于 作者: Y yuyang18

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/non_layer_api_doc

......@@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
return train_program, fluid.default_startup_program()
else:
raise ValueError(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)
......
......@@ -108,10 +108,10 @@ def gen_job():
tn_container["ports"][0]["containerPort"] = spreadport
envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname})
envs.append({"name": "TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
envs.append({"name": "ENTRY", "value": args.entry})
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
......@@ -167,16 +167,22 @@ def gen_job():
tn_container["volumeMounts"] = volumeMounts
ps_container["env"] = envs
ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"})
ps_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "PSERVER"
})
tn_container["env"] = envs
if args.disttype == "pserver":
tn_container["env"].append({
"name": "TRAINING_ROLE",
"name": "PADDLE_TRAINING_ROLE",
"value": "TRAINER"
})
elif args.disttype == "nccl2" or args.disttype == "local":
# NCCL2 have no training role, set to plain WORKER
tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"})
tn_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "WORKER"
})
os.mkdir(args.jobname)
if args.disttype == "pserver":
......
......@@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE()
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
ENDIF()
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result")
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result")
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
ExternalProject_Add(
......
......@@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
第二步,启动Parameter Server:
```bash
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py
```
执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。
第三步,启动Trainer:
```bash
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py
```
由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。
......
......@@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
ret_list.append(f)
return ret_list
trainers = int(os.getenv("TRAINERS"))
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
data_file = fluid.layers.io.open_files(
filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0),
thread_num=1,
......
......@@ -14,4 +14,3 @@
#
add_subdirectory(inference)
add_subdirectory(tape)
# Dynamic Graph on Fluid
PaddlePaddle Fluid is targeting the autodiff without tape, which, however, is very
challenging and we are still way from there. DyNet and PyTorch provide a good design
idea, the *tape*, that significantly eases the challenge. Also, DyNet provides
a C++ API that is as convenient as Python but with higher efficiency and could
conveniently integrate with industrial/production systems. This package, `tape`,
combines the good of
1. tape from PyTorch and DyNet
2. C++ API and core from DyNet
3. rich set of operators from PaddlePaddle
## Overview
We can implement Dynet-like Tape(See this [survey](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/survey/dynamic_graph.md))
by wrapping Paddle Fluid's `Operator` and `Variable`.
The user API is straight forward since
1. it is imperative. And it uses host language's control flow logic.
1. it avoids extra concepts such as `Scope` and `Executor`.
All of these benefits come at the cost of just adding one line `reset_global_tape`
at every iteration.
## Code Structure
In short, the `Tape` contains a vector of `OpHandle`s. And an `OpHandle` contains its
`type`, the pointers to the `Variable`s, and necessary attributes.
```c++
class Variable {
public:
VriableHandle Grad(); // returns its gradient variable
private:
framework::VarDesc desc_; // compile time infershape, necessary for lazy execution
framework::Variable var_; // run time variable, holds data memory
};
using VariableHandle = shared_ptr<Variable>;
struct OpHandle {
string type_;
map<string, vector<VariableHandle>> inputs_;
map<string, vector<VariableHandle>> outputs_;
AttributeMap attrs_;
};
class Tape {
public:
void AddOp(OpHandle); // add op
void Forward(); // execute the tape_
void Backward(); // execute the backward of the tape_
private:
vector<OpHandle> tape_;
};
```
We uses `Function` to indicate layers. It takes care of parameter
initialization and `AddOp` to the Tape when it is called.
```c++
class Linear {
public:
Linear(int in_dim, int out_dim, const std::string &act)
: w_(new Variable("LinearWeight")),
b_(new Variable("LinearBias")),
act_(act) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{in_dim, out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs);
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs);
init_tape.Forward();
}
VariableHandle operator()(VariableHandle input) {
VariableHandle pre_bias(new Variable("linear"));
get_global_tape().AddOp("mul",
{{"X", {input}}, {"Y", {w_}}},
{{"Out", {pre_bias}}},
{{"x_num_col_dims", 1}, {"y_num_col_dims", 1}});
VariableHandle pre_act(new Variable("linear"));
get_global_tape().AddOp("elementwise_add",
{{"X", {pre_bias}}, {"Y", {b_}}},
{{"Out", {pre_act}}},
{{"axis", 1}});
VariableHandle post_act(new Variable("linear"));
get_global_tape().AddOp(act_,
{{"X", {pre_act}}},
{{"Out", {post_act}}},
{});
return post_act;
}
std::vector<VariableHandle> Params() { return {w_, b_}; }
private:
VariableHandle w_;
VariableHandle b_;
std::string act_;
};
```
## User API
```c++
// Model function
paddle::tape::Linear linear1(3, 3, "relu"); // init weight and bias
paddle::tape::Linear linear2(3, 3, "relu"); // init weight and bias
paddle::tape::Mean mean;
// Optimizer
paddle::tape::SGD sgd(0.001);
// Data Feeder
paddle::tape::Fill data_feeder(...);
VariableHandle input(new paddle::tape::Variable("input"));
VariableHandle label(new paddle::tape::Variable("label"));
for (int i = 0; i < 2; ++i) {
reset_global_tape();
data_feeder(input, label);
auto loss = softmax(linear2(linear1(input)), label); // compile time InferShape & InferVarType
LOG(INFO) << loss.value(); // Run forward up to loss
// Run backward, store gradient of w at w->Grad()
get_global_tape.Backward(loss);
// Update w
sgd(linear1.Params());
sgd(linear2.Params());
}
```
<details>
<summary></summary>
digraph G {
subgraph cluster_0 {
node [shape=record,style=filled];
style=filled;
color=lightgrey;
linear1 [label="{type: mul | {input | {<before_mul1>X: before_mul1 |<weight1> Y: weight1}} | {output |<before_bias1> Out: before_bias1}}"];
elementwise_add1 [label="{type: elementwise_add | {input | {<before_bias1>X: before_bias1 |<bias1> Y: bias1}} | {output |<before_act1> Out: before_act1}}"];
relu1 [label="{type: relu | {input | {<before_act1>X: before_act1 }} | {output |<after_act1> Out: after_act1}}"];
linear1 -> elementwise_add1->relu1;
label = "forward tape";
}
linear1:before_mul1->before_mul1
linear1:weight1->weight1
linear1:before_bias1->before_bias1
elementwise_add1:bias1->bias1
elementwise_add1:before_bias1->before_bias1
elementwise_add1:before_act1->before_act1
relu1:before_act1->before_act1
relu1:after_act1->after_act1
subgraph cluster_1 {
node [shape=record,style=filled];
style=filled;
color=lightgrey;
linear1_grad [label="{type: mul_grad | {input | {<before_mul1>X: before_mul1 |<weight1> Y: weight1|<before_bias1_grad> Out_grad: before_bias1_grad}} | {output |{<before_mul1_grad>X_grad: before_mul1_grad |<weight1_grad> Y_grad: weight1_grad}}}"];
elementwise_add1_grad [label="{type: elementwise_add_grad | {input | <before_act1_grad> Out_grad: before_act1_grad} | {output |{<before_bias1_grad>X_grad: before_bias1_grad |<bias1_grad> Y_grad: bias1_grad}}}"];
relu1_grad [label="{type: relu_grad | {input |<after_act1_grad> Out_grad: after_act1_grad} | {ouput | {<before_act1_grad>X_grad: before_act1_grad }}}"];
linear1_grad -> elementwise_add1_grad ->relu1_grad [dir=back];
label = "backward tape";
}
relu1_grad:after_act1_grad->after_act1_grad
relu1_grad:before_act1_grad->before_act1_grad
elementwise_add1_grad:before_act1_grad->before_act1_grad
elementwise_add1_grad:before_bias1_grad->before_bias1_grad
elementwise_add1_grad:bias1_grad->bias1_grad
linear1_grad:before_mul1->before_mul1
linear1_grad:weight1->weight1
linear1_grad:before_bias1_grad->before_bias1_grad
linear1_grad:before_mul1_grad->before_mul1_grad
linear1_grad:weight1_grad->weight1_grad
subgraph cluster_2 {
node [shape=record];
label = "Linear1";
weight1
bias1
}
weight1 -> weight1_grad [ label="Grad()", style="dashed" ];
bias1 -> bias1_grad [ label="Grad()", style="dashed"];
}
</details>
![Image](https://github.com/tonyyang-svail/Paddle/blob/cpp_tap/paddle/contrib/tape/computation_graph.png)
## Code Reuse
We want to stay close to Paddle Fluid as much as possible.
### Reuse All Operators
As all Ops are registered at `OpInfoMap`, the effort of adding a new `Function`
is about 10 lines of code, similar to expose an operator to Python.
### Reuse Compile Time InferShape and InferVarType
Note that all the symbolic information is stored at `tape::Varaible::desc_`, instead
of `ProgramDesc.block.vars`, we create a temporary `BlockDesc` to do `InferShape` and
`InferVarType` every time we `AddOp` to the tape.
### Reuse Operator::Run
We use smart pointer, instead of `Scope`, to manage memory. So we create a temporary
`Scope` for every `Operator::Run()`.
## Possible Feature
### Release Memory on Backward
We can release memory aggressively. During backward, we can delete the OpHandle once
we have finished its backward. Since all the variable is managed by smart pointer, the
memory is automatically released when its `ref_count` goes to 0.
### Kernel Fusion
As a symbolic representation of the Tape is constructed first before the actual
execution, it would be possible to perform graph optimization. One use case is kernel
fusion.
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/contrib/tape/tape.h"
#include "paddle/contrib/tape/variable.h"
#include "paddle/fluid/framework/type_defs.h"
namespace paddle {
namespace tape {
class Function {};
class Fill {
public:
Fill(const std::string &initializer, const framework::AttributeMap &attrs)
: initializer_(initializer), attrs_(attrs) {}
void operator()(VariableHandle var) {
get_global_tape().AddOp(initializer_, {}, {{"Out", {var}}}, attrs_);
}
private:
const std::string initializer_;
const framework::AttributeMap attrs_;
};
class Mean {
public:
VariableHandle operator()(VariableHandle var) {
VariableHandle out(new Variable("mean"));
get_global_tape().AddOp("mean", {{"X", {var}}}, {{"Out", {out}}}, {});
return out;
}
};
class Linear {
public:
Linear(int in_dim, int out_dim, const std::string &act)
: w_(new Variable("LinearWeight")),
b_(new Variable("LinearBias")),
act_(act) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{in_dim, out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs);
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{out_dim};
attrs["value"] = 1.0f;
init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs);
init_tape.Forward();
}
VariableHandle operator()(VariableHandle input) {
VariableHandle pre_bias(new Variable("linear"));
get_global_tape().AddOp("mul",
{{"X", {input}}, {"Y", {w_}}},
{{"Out", {pre_bias}}},
{{"x_num_col_dims", 1}, {"y_num_col_dims", 1}});
VariableHandle pre_act(new Variable("linear"));
get_global_tape().AddOp("elementwise_add",
{{"X", {pre_bias}}, {"Y", {b_}}},
{{"Out", {pre_act}}},
{{"axis", 1}});
VariableHandle post_act(new Variable("linear"));
get_global_tape().AddOp(
act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {});
return post_act;
}
std::vector<VariableHandle> Params() { return {w_, b_}; }
private:
VariableHandle w_;
VariableHandle b_;
std::string act_;
};
class SGD {
public:
SGD(float learning_rate) : learning_rate_(new Variable("sgd")) {
Tape init_tape;
std::string initializer = "fill_constant";
framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{1};
attrs["value"] = learning_rate;
init_tape.AddOp(initializer, {}, {{"Out", {learning_rate_}}}, attrs);
init_tape.Forward();
}
void operator()(VariableHandle input) {
PADDLE_ENFORCE(get_global_tape().HasBeenBackwarded(),
"optimization must happen after the backward");
Tape temp_tape;
temp_tape.AddOp("sgd",
{{"Param", {input}},
{"LearningRate", {learning_rate_}},
{"Grad", {input->Grad()}}},
{{"ParamOut", {input}}},
{});
temp_tape.Forward();
}
private:
VariableHandle learning_rate_;
};
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/contrib/tape/tape.h"
#include <list>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/dim.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle {
namespace tape {
// borrowed from
// https://stackoverflow.com/questions/874134/find-if-string-ends-with-another-string-in-c
inline bool ends_with(std::string const &value, std::string const &ending) {
if (ending.size() > value.size()) return false;
return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}
std::ostream &operator<<(std::ostream &os, const framework::VarDesc &var_desc) {
os << var_desc.Name();
os << "[" << var_desc.GetType() << "]";
os << "[" << var_desc.GetDataType() << "]";
os << "{";
for (auto &i : var_desc.GetShape()) {
os << i << ",";
}
os << "}";
return os;
}
std::string to_string(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs) {
std::stringstream ss;
ss << type << " ";
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
ss << param_name.first << ":(" << var->Desc() << ") ";
}
}
for (auto &param_name : out_vars) {
for (auto &var : param_name.second) {
ss << param_name.first << ":(" << var->Desc() << ") ";
}
}
return ss.str();
}
framework::OpDesc CreateOpDesc(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs) {
framework::VariableNameMap inputs;
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
inputs[param_name.first].emplace_back(var->Name());
}
}
framework::VariableNameMap outputs;
for (auto &param_name : out_vars) {
for (auto &var : param_name.second) {
outputs[param_name.first].emplace_back(var->Name());
}
}
return framework::OpDesc(type, inputs, outputs, attrs);
}
void InferShapeAndVarType(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap *out_vars,
const framework::AttributeMap &attrs) {
framework::OpDesc op_desc = CreateOpDesc(type, in_vars, *out_vars, attrs);
// Create a temporary block for compile-time
framework::ProgramDesc program_desc;
framework::BlockDesc *block_desc = program_desc.MutableBlock(0);
PADDLE_ENFORCE(block_desc);
for (auto &param_name : in_vars) {
for (auto &var : param_name.second) {
*block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto();
}
}
for (auto &param_name : *out_vars) {
for (auto &var : param_name.second) {
*block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto();
}
}
LOG(INFO) << "- " << to_string(type, in_vars, *out_vars, attrs);
op_desc.InferShape(*block_desc);
op_desc.InferVarType(block_desc);
for (auto &param_name : *out_vars) {
for (auto &var : param_name.second) {
*var->MutableDesc()->Proto() = *block_desc->Var(var->Name())->Proto();
}
}
LOG(INFO) << "+ " << to_string(type, in_vars, *out_vars, attrs);
}
void Tape::AddOp(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap out_vars,
const framework::AttributeMap &attrs) {
InferShapeAndVarType(type, in_vars, &out_vars, attrs);
tape_.emplace_back(type, in_vars, out_vars, attrs);
}
// Temporary Scope for Operator::Run()
class ScopeWrapper : public framework::Scope {
public:
ScopeWrapper(const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars) {
for (auto &v : in_vars) {
for (auto &vv : v.second) {
if (!vars_.count(vv->Name())) {
vars_[vv->Name()].reset(vv->Var());
}
}
}
for (auto &v : out_vars) {
for (auto &vv : v.second) {
if (!vars_.count(vv->Name())) {
vars_[vv->Name()].reset(vv->Var());
}
}
}
}
~ScopeWrapper() {
for (auto &pair : vars_) {
pair.second.release();
}
}
};
void Tape::Forward() {
LOG(INFO) << "Starting forward -------------------------";
PADDLE_ENFORCE(!has_been_backwarded_);
while (current_position_ < tape_.size()) {
OpHandle &op = tape_[current_position_];
// Create Output Tensor, this is only necessary for OpWithKernel
for (auto &param2var : op.outputs_) {
for (auto &var : param2var.second) {
var->InitializeVariable();
}
}
framework::OpDesc op_desc =
CreateOpDesc(op.type_, op.inputs_, op.outputs_, op.attrs_);
ScopeWrapper scope(op.inputs_, op.outputs_);
framework::OpRegistry::CreateOp(op_desc)->Run(scope, platform::CPUPlace());
current_position_++;
}
LOG(INFO) << "Finishing forward -------------------------";
}
void Tape::Backward(VariableHandle target) {
PADDLE_ENFORCE(!has_been_backwarded_);
Forward();
// TODO(tonyyang-svail): check output of last op is target
backward_tape_.reset(new Tape());
framework::AttributeMap attrs;
// FIXME(tonyyang-svail): Need to infer_data_type
attrs["dtype"] = framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{1};
attrs["value"] = 1.0f;
backward_tape_->AddOp(
"fill_constant", {}, {{"Out", {target->Grad()}}}, attrs);
for (auto it = tape_.rbegin(); it != tape_.rend(); ++it) {
framework::OpDesc op_desc =
CreateOpDesc(it->type_, it->inputs_, it->outputs_, it->attrs_);
std::unordered_map<std::string, std::string> grad_to_var;
std::vector<std::unique_ptr<framework::OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance()
.Get(op_desc.Type())
.GradOpMaker()(op_desc, {}, &grad_to_var, {});
for (auto &op_desc : grad_op_descs) {
std::unordered_map<std::string, VariableHandle> name2var;
for (auto &param2vars : it->inputs_) {
for (auto &a : param2vars.second) {
name2var[a->Name()] = a;
}
}
for (auto &param2vars : it->outputs_) {
for (auto &a : param2vars.second) {
name2var[a->Name()] = a;
}
}
VariableHandleMap in_vars;
VariableHandleMap out_vars;
std::map<const framework::VariableNameMap *, VariableHandleMap *>
loop_over{{&op_desc->Inputs(), &in_vars},
{&op_desc->Outputs(), &out_vars}};
for (auto &each : loop_over) {
auto &vmp = *each.first;
auto &vhm = *each.second;
for (auto &p2a : vmp) {
for (auto &argu : p2a.second) {
if (name2var.count(argu)) {
vhm[p2a.first].push_back(name2var[argu]);
} else {
PADDLE_ENFORCE(ends_with(argu, framework::kGradVarSuffix),
argu.c_str());
std::string name = argu.substr(
0, argu.size() - std::strlen(framework::kGradVarSuffix));
PADDLE_ENFORCE(name2var.count(name), name.c_str());
vhm[p2a.first].push_back(name2var[name]->Grad());
}
}
}
}
backward_tape_->AddOp(
op_desc->Type(), in_vars, out_vars, op_desc->GetAttrMap());
}
// TODO(tonyyang-svail): how to fill empty grad?
// TODO(tonyyang-svail): Sum var grad is necessary
}
backward_tape_->Forward();
has_been_backwarded_ = true;
}
Tape &get_global_tape() {
static Tape T;
return T;
}
void reset_global_tape() { get_global_tape() = Tape(); }
}
}
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/framework/operator.h" // framework::kGradVarSuffix
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle {
namespace tape {
class Variable;
using VariableHandle = std::shared_ptr<Variable>;
/*
* Combination of
* framework::VarDesc desc_;
* framework::Variable var_;
*/
class Variable {
public:
Variable(const std::string pre_fix)
: desc_(pre_fix + std::to_string(count())) {}
Variable(const std::string pre_fix, bool is_grad)
: desc_(pre_fix + (is_grad ? framework::kGradVarSuffix
: std::to_string(count()))) {}
~Variable() { LOG(INFO) << "Deleting " << Name(); }
// Instantiate LoDTensor/SelectedRow
void InitializeVariable();
VariableHandle Grad() {
if (grad_.expired()) {
VariableHandle new_grad(new Variable(desc_.Name(), true));
grad_ = new_grad;
return new_grad;
} else {
return VariableHandle(grad_);
}
}
// Stochastic Gradient Descent with Momentum
// VariableHandle Momentum ();
// void init(const std::string& initializer,
// const framework::AttributeMap& attrs);
// void value() {};
const framework::VarDesc& Desc() const { return desc_; }
framework::VarDesc* MutableDesc() { return &desc_; }
// TODO(tonyyang-svail): No need to expose name
std::string Name() const { return desc_.Name(); }
framework::Variable* Var() { return &var_; }
private:
int count() {
static int counter = 0;
return counter++;
}
framework::VarDesc desc_;
framework::Variable var_;
std::weak_ptr<Variable> grad_;
};
}
}
......@@ -295,13 +295,14 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
const ProgramDesc& program, int block_id) {
auto* ctx = new ExecutorPrepareContext(program, block_id);
std::unique_ptr<ExecutorPrepareContext> ctx(
new ExecutorPrepareContext(program, block_id));
PADDLE_ENFORCE_LT(static_cast<size_t>(block_id), program.Size());
auto& block = program.Block(block_id);
for (auto& op_desc : block.AllOps()) {
ctx->ops_.push_back(OpRegistry::CreateOp(*op_desc));
}
return std::unique_ptr<ExecutorPrepareContext>(ctx);
return ctx;
}
std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
......
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(analysis SRCS dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc fluid_to_data_flow_graph_pass.cc
DEPS paddle_fluid)
cc_library(analysis SRCS pass_manager.cc dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
fluid_to_data_flow_graph_pass.cc
data_flow_graph_to_fluid_pass.cc
tensorrt_subgraph_pass.cc
dfg_graphviz_draw_pass.cc
DEPS framework_proto)
cc_test(test_node SRCS node_tester.cc DEPS analysis)
cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests)
cc_test(test_data_flow_graph SRCS data_flow_graph_tester.cc DEPS analysis ${FLUID_CORE_MODULES} paddle_fluid
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
set_tests_properties(test_data_flow_graph PROPERTIES DEPENDS test_word2vec)
function (inference_analysis_test TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS)
cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cc_test(test_subgraph_splitter
SRCS subgraph_splitter_tester.cc
DEPS analysis paddle_fluid tensor
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
set_tests_properties(test_subgraph_splitter PROPERTIES DEPENDS test_word2vec)
cc_test(${TARGET}
SRCS "${analysis_test_SRCS}"
DEPS analysis
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model --fraction_of_gpu_memory_to_use=0.5)
set_tests_properties(${TARGET} PROPERTIES DEPENDS test_word2vec)
endfunction(inference_analysis_test)
cc_test(test_dfg_graphviz_draw_pass
SRCS dfg_graphviz_draw_pass_tester.cc
DEPS analysis
ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model)
set_tests_properties(test_dfg_graphviz_draw_pass PROPERTIES DEPENDS test_word2vec)
inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)
inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc)
inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc)
inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc)
#inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc)
inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc)
......@@ -12,22 +12,4 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/contrib/tape/variable.h"
namespace paddle {
namespace tape {
void Variable::InitializeVariable() {
LOG(INFO) << "Initialzing " << desc_.Name() << " as " << desc_.GetType();
framework::proto::VarType::Type var_type = desc_.GetType();
if (var_type == framework::proto::VarType::LOD_TENSOR) {
var_.GetMutable<framework::LoDTensor>();
} else if (var_type == framework::proto::VarType::SELECTED_ROWS) {
var_.GetMutable<framework::SelectedRows>();
} else {
PADDLE_THROW("Variable type %d is not in [LOD_TENSOR, SELECTED_ROWS]",
var_type);
}
}
}
}
#include "paddle/fluid/inference/analysis/argument.h"
......@@ -11,54 +11,45 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/contrib/tape/variable.h"
namespace paddle {
namespace tape {
using VariableHandleMap = std::map<std::string, std::vector<VariableHandle>>;
struct OpHandle {
OpHandle(const std::string &type,
const VariableHandleMap &in_vars,
const VariableHandleMap &out_vars,
const framework::AttributeMap &attrs)
: type_(type), inputs_(in_vars), outputs_(out_vars), attrs_(attrs) {}
/*
* This file defines the class Argument, which is the input and output of the
* analysis module. All the fields that needed either by Passes or PassManagers
* are contained in Argument.
*
* TODO(Superjomn) Find some way better to contain the fields when it grow too
* big.
*/
std::string type_;
VariableHandleMap inputs_;
VariableHandleMap outputs_;
framework::AttributeMap attrs_;
};
class Tape {
public:
void AddOp(const std::string &type,
const VariableHandleMap &in_vars,
VariableHandleMap out_vars,
const framework::AttributeMap &attrs);
void Forward();
void Backward(VariableHandle target);
bool HasBeenBackwarded() { return has_been_backwarded_; }
#pragma once
private:
bool has_been_backwarded_ = false;
size_t current_position_ = 0;
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
std::vector<OpHandle> tape_;
std::shared_ptr<Tape> backward_tape_;
namespace paddle {
namespace inference {
namespace analysis {
/*
* The argument definition of both Pass and PassManagers.
*
* All the fields should be registered here for clearness.
*/
struct Argument {
// The graph that process by the Passes or PassManagers.
std::unique_ptr<DataFlowGraph> main_dfg;
// The original program desc.
std::unique_ptr<framework::proto::ProgramDesc> origin_program_desc;
};
Tape &get_global_tape();
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#define ANALYSIS_ARGUMENT_CHECK_FIELD(field__) \
if (UNLIKELY(!(field__))) { \
LOG(ERROR) << "field " << #field__ << " should be set."; \
return false; \
}
void reset_global_tape();
}
}
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace paddle {
namespace inference {
......@@ -57,19 +58,7 @@ std::string DataFlowGraph::DotString() const {
// Add nodes
for (size_t i = 0; i < nodes.size(); i++) {
const Node &node = nodes.Get(i);
switch (node.type()) {
case Node::Type::kValue:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunction:
dot.AddNode(node.repr(), node.dot_attrs());
break;
case Node::Type::kFunctionBlock:
dot.AddNode(node.repr(), node.dot_attrs());
break;
default:
PADDLE_THROW("unsupported Node type %d", static_cast<int>(node.type()));
}
dot.AddNode(node.repr(), node.dot_attrs());
}
// Add edges
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/framework/proto_desc.h"
namespace paddle {
namespace inference {
namespace analysis {
bool DataFlowGraphToFluidPass::Initialize(Argument* argument) {
ANALYSIS_ARGUMENT_CHECK_FIELD(argument)
ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc)
desc_ = argument->origin_program_desc.get();
// Here some logic from program_desc.cc and will not add new interfaces into
// framework::ProgramDesc class, use some UT to assure the correctness.
auto* block = desc_->mutable_blocks()->Add();
block->set_idx(framework::kRootBlockIndex);
block->set_parent_idx(framework::kNoneBlockIndex);
return true;
}
bool DataFlowGraphToFluidPass::Finalize() { return true; }
void DataFlowGraphToFluidPass::Run(DataFlowGraph* graph) {
auto traits = GraphTraits<DataFlowGraph>(graph);
for (auto it = traits.nodes().begin(); it != traits.nodes().end(); ++it) {
if (it->deleted()) continue;
switch (it->type()) {
case Node::Type::kFunction:
LOG(INFO) << "add function " << it->name();
AddFluidOp(&(*it));
break;
case Node::Type::kFunctionBlock:
AddEngineOp(&(*it));
break;
default:
continue;
}
}
}
void DataFlowGraphToFluidPass::AddFluidOp(Node* node) {
LOG(INFO) << "processing func " << node->name();
auto* ori_op = static_cast<framework::proto::OpDesc*>(node->pb_desc());
// currently only the main block is analyzed.
auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex);
auto* op = main_block->add_ops();
LOG(INFO) << "to copy the op";
*op = *ori_op; // copy the attributes, by default, these will not be changed
// by analysis phrase.
// The inputs and outputs of the existing ops are not changed by tensorrt
// subgraph pass.
// NOTE It might be changed by other passes in the long run.
}
void DataFlowGraphToFluidPass::AddEngineOp(Node* node) {
// auto* ori_op = static_cast<framework::proto::OpDesc*>(node->extra_info());
// auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex);
// auto* op = main_block->add_ops();
// TODO(Superjomn) Here need to expose some arguments for default setting.
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file implements the transformation from fluid ProgramDesc to data flow
* graph.
*/
#pragma once
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {
namespace inference {
namespace analysis {
class DataFlowGraphToFluidPass final : public DataFlowGraphPass {
public:
DataFlowGraphToFluidPass() = default;
bool Initialize(Argument *argument) override;
bool Finalize() override;
void Run(DataFlowGraph *graph) override;
std::string repr() const override { return "DFG to fluid"; }
std::string description() const override {
return "Transform a DFG to a Fluid ProgramDesc";
}
Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const override {
return nullptr;
}
protected:
// Add a Fluid Op into the ProgramDesc.
void AddFluidOp(Node *node);
// Add a EngineOp into the ProgramDesc.
void AddEngineOp(Node *node);
private:
framework::proto::ProgramDesc *desc_;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -27,13 +27,12 @@ namespace inference {
namespace analysis {
TEST_F(DFG_Tester, Test) {
framework::proto::ProgramDesc new_desc;
DataFlowGraph graph;
FluidToDataFlowGraphPass pass0;
DataFlowGraphToFluidPass pass1;
pass0.Initialize(desc);
pass1.Initialize(&new_desc);
ASSERT_TRUE(pass0.Initialize(&argument));
ASSERT_TRUE(pass1.Initialize(&argument));
pass0.Run(&graph);
pass1.Run(&graph);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) {
auto content = Draw(graph);
std::ofstream file(GenDotPath());
file.write(content.c_str(), content.size());
file.close();
LOG(INFO) << "draw dot to " << GenDotPath();
}
std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) {
Dot dot;
// Add nodes
for (size_t i = 0; i < graph->nodes.size(); i++) {
const Node &node = graph->nodes.Get(i);
if (config_.display_deleted_node || !node.deleted()) {
dot.AddNode(node.repr(), node.dot_attrs());
}
}
// Add edges
for (size_t i = 0; i < graph->nodes.size(); i++) {
const Node &node = graph->nodes.Get(i);
if (!config_.display_deleted_node && node.deleted()) continue;
for (auto &in : node.inlinks) {
if (!config_.display_deleted_node && in->deleted()) continue;
for (auto &in : node.inlinks) {
dot.AddEdge(in->repr(), node.repr(), {});
}
}
}
return dot.Build();
}
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -21,6 +21,7 @@ limitations under the License. */
#include <fstream>
#include <string>
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {
......@@ -32,35 +33,39 @@ namespace analysis {
*/
class DFG_GraphvizDrawPass : public DataFlowGraphPass {
public:
DFG_GraphvizDrawPass(const std::string& dir, const std::string& id)
: dir_(dir), id_(id) {}
bool Initialize() override { return Pass::Initialize(); }
void Run(DataFlowGraph* graph) override {
auto content = Draw(graph);
std::ofstream file(GenDotPath());
file.write(content.c_str(), content.size());
file.close();
LOG(INFO) << "draw dot to " << GenDotPath();
}
struct Config {
Config(const std::string &dir, const std::string &id,
bool display_deleted_node = false)
: dir(dir), id(id), display_deleted_node(display_deleted_node) {}
// The directory to store the .dot or .png files.
const std::string dir;
// The identifier for this dot file.
const std::string id;
// Whether to display deleted nodes, default false.
const bool display_deleted_node;
};
DFG_GraphvizDrawPass(const Config &config) : config_(config) {}
bool Initialize(Argument *argument) override { return true; }
void Run(DataFlowGraph *graph) override;
bool Finalize() override { return Pass::Finalize(); }
Pass* CreatePrinterPass(std::ostream& os,
const std::string& banner) const override {
return nullptr;
std::string repr() const override { return "DFG graphviz drawer"; }
std::string description() const override {
return "Debug a DFG by draw with graphviz";
}
private:
// Path of the dot file to output.
std::string GenDotPath() const {
return dir_ + "/" + "graph_" + id_ + ".dot";
return config_.dir + "/" + "graph_" + config_.id + ".dot";
}
std::string Draw(DataFlowGraph* graph) { return graph->DotString(); }
std::string Draw(DataFlowGraph *graph);
std::string dir_;
std::string id_;
Config config_;
};
} // namespace analysis
......
......@@ -24,9 +24,10 @@ namespace inference {
namespace analysis {
TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) {
auto dfg = ProgramDescToDFG(desc);
DFG_GraphvizDrawPass pass("./", "test");
pass.Initialize();
auto dfg = ProgramDescToDFG(*argument.origin_program_desc);
DFG_GraphvizDrawPass::Config config("./", "test");
DFG_GraphvizDrawPass pass(config);
pass.Initialize(&argument);
pass.Run(&dfg);
// test content
......@@ -38,7 +39,8 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) {
while (std::getline(file, line)) {
no++;
}
ASSERT_EQ(no, 82);
// DFG is sensitive to ProgramDesc, be careful to change the existing models.
ASSERT_EQ(no, 112);
}
} // namespace analysis
......
......@@ -21,19 +21,23 @@ namespace paddle {
namespace inference {
namespace analysis {
FluidToDataFlowGraphPass::FluidToDataFlowGraphPass() {}
bool FluidToDataFlowGraphPass::Initialize() { return Pass::Initialize(); }
bool FluidToDataFlowGraphPass::Initialize(
const framework::proto::ProgramDesc &desc) {
desc_ = &desc;
bool FluidToDataFlowGraphPass::Initialize(Argument *argument) {
ANALYSIS_ARGUMENT_CHECK_FIELD(argument);
ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc);
PADDLE_ENFORCE(argument);
if (!argument->main_dfg) {
LOG(INFO) << "Init DFG";
argument->main_dfg.reset(new DataFlowGraph);
}
desc_ = argument->origin_program_desc.get();
return true;
}
bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); }
void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
PADDLE_ENFORCE(graph);
PADDLE_ENFORCE(desc_);
// insert vars
std::unordered_map<std::string, size_t> var2id;
auto &main_block = desc_->blocks(framework::kRootBlockIndex);
......@@ -41,7 +45,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
const auto &var = main_block.vars(i);
auto *v = graph->nodes.Create(Node::Type::kValue);
v->SetName(var.name());
v->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&var)));
v->SetPbDesc(const_cast<void *>(static_cast<const void *>(&var)));
var2id[var.name()] = v->id();
}
for (int i = 0; i < main_block.ops_size(); i++) {
......@@ -51,7 +55,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) {
static_cast<Function *>(o)->SetFuncType(op.type());
// Link to the original protobuf message's memory, make it easier to
// generate from a data flow graph to fluid ProgramDesc.
o->SetExtraInfo(const_cast<void *>(static_cast<const void *>(&op)));
o->SetPbDesc(const_cast<void *>(static_cast<const void *>(&op)));
// set inputs and outputs
// TODO(Superjomn) make sure the InputNames is the real variable name.
for (int j = 0; j < op.inputs_size(); j++) {
......
......@@ -34,13 +34,18 @@ namespace analysis {
*/
class FluidToDataFlowGraphPass final : public DataFlowGraphPass {
public:
FluidToDataFlowGraphPass();
bool Initialize() override;
bool Initialize(const framework::proto::ProgramDesc &desc) override;
FluidToDataFlowGraphPass() = default;
bool Initialize(Argument *argument) override;
bool Finalize() override;
void Run(DataFlowGraph *graph) override;
std::string repr() const override { return "fluid-to-data-flow-graph"; }
std::string description() const override {
return "transform a fluid ProgramDesc to a data flow graph.";
}
Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const override;
......
......@@ -23,11 +23,11 @@ namespace analysis {
TEST_F(DFG_Tester, Init) {
FluidToDataFlowGraphPass pass;
pass.Initialize();
pass.Initialize(desc);
pass.Initialize(&argument);
DataFlowGraph graph;
pass.Run(&graph);
ASSERT_GT(graph.nodes.size(), 0);
// Analysis is sensitive to ProgramDesc, careful to change the original model.
ASSERT_EQ(graph.nodes.size(), 37);
pass.Finalize();
LOG(INFO) << '\n' << graph.DotString();
}
......
......@@ -62,6 +62,7 @@ struct DataTypeNamer {
SET_TYPE(int);
SET_TYPE(bool);
SET_TYPE(float);
SET_TYPE(void *);
}
std::unordered_map<decltype(typeid(int).hash_code()), // NOLINT
......
......@@ -40,6 +40,9 @@ Node *NodeMap::Create(Node::Type type) {
case Node::Type::kValue:
nodes_.emplace_back(new Value);
break;
case Node::Type::kFunctionBlock:
nodes_.emplace_back(new FunctionBlock);
break;
default:
PADDLE_THROW("Not supported node type.");
}
......
......@@ -71,12 +71,17 @@ class Node {
// Get an additional attribute and convert it to T data type. NOTE this will
// silently create a new attribute if not exists.
Attr &attr(const std::string &name) { return attrs_[name]; }
Attr &attr(const std::string &name) const { return attrs_[name]; }
int id() const { return id_; }
bool deleted() const { return deleted_; }
// The Protobuf description is set/get with a void* to decouple Node interface
// from a specific kind of Protobuf message.
void SetPbDesc(void *pb) { attr("pb_desc").Pointer() = pb; }
void *pb_desc() const { return attr("pb_desc").Pointer(); }
void SetDeleted() { deleted_ = true; }
bool deleted() const { return deleted_; }
void SetName(const std::string &name) { name_ = name; }
const std::string &name() const { return name_; }
......@@ -84,29 +89,25 @@ class Node {
void SetType(Type type) { type_ = type; }
Type type() const { return type_; }
void *extra_info() const { return extra_info_; }
void SetExtraInfo(void *extra_info) { extra_info_ = extra_info; }
// Input links.
std::vector<Node *> inlinks;
// Output links.
std::vector<Node *> outlinks;
// A helper class to maintain the status from Pass.
// TODO(superjomn) add a checker here to ensure the T is primary.
struct Attr {
// NOTE T should be a primary type or a struct combined by several primary
// types.
// NOTE the STL containers should not use here.
// Some usages
// Attr attr;
// T data;
// attr.data.assign((char*)data, sizeof(data));
// Attr attr;
// attr.Bool() = true;
bool &Bool() { return As<bool>(); }
float &Float() { return As<float>(); }
int32_t &Int32() { return As<int32_t>(); }
int64_t &Int64() { return As<int64_t>(); }
void *&Pointer() { return As<void *>(); }
private:
template <typename T>
......@@ -130,6 +131,7 @@ class Node {
size_t type_hash_{std::numeric_limits<size_t>::max()};
};
// Type checks.
bool IsFunction() const { return type_ == Node::Type::kFunction; }
bool IsValue() const { return type_ == Node::Type::kValue; }
bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; }
......@@ -148,9 +150,6 @@ class Node {
Type type_{Type::kNone};
// Mark this node is deleted by some pass.
bool deleted_{false};
void *extra_info_;
mutable std::unordered_map<std::string, Attr> attrs_;
};
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/node.h"
......@@ -30,19 +31,24 @@ namespace analysis {
class Pass {
public:
Pass() = default;
virtual ~Pass() {}
virtual ~Pass() = default;
// Virtual method overridden by subclasses to do only necessary initialization
// before any pass is run.
virtual bool Initialize() { return false; }
// virtual bool Initialize() { return false; }
// There is some passes such as FlowToDataFlowGraphPass that needs a
// ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it
// only couple with the proto file.
virtual bool Initialize(const framework::proto::ProgramDesc &desc) {
return false;
}
// virtual bool Initialize(const framework::proto::ProgramDesc &desc) { return
// false; }
// There are some Passes such as DataFlowGraphToFluidPass that will output a
// ProgramDesc.
virtual bool Initialize(framework::proto::ProgramDesc *desc) { return false; }
// virtual bool Initialize(framework::proto::ProgramDesc *desc) { return
// false; }
// Mutable Pass.
virtual bool Initialize(Argument *argument) { return false; }
// Readonly Pass.
virtual bool Initialize(const Argument &argument) { return false; }
// Virtual method overriden by subclasses to do any necessary clean up after
// all passes have run.
......@@ -50,7 +56,9 @@ class Pass {
// Get a Pass appropriate to print the Node this pass operates on.
virtual Pass *CreatePrinterPass(std::ostream &os,
const std::string &banner) const = 0;
const std::string &banner) const {
return nullptr;
}
// Run on a single Node.
virtual void Run(Node *x) { LOG(FATAL) << "not valid"; }
......@@ -60,6 +68,11 @@ class Pass {
virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; }
// Run on a single DataFlowGraph.
virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; }
// Human-readable short representation.
virtual std::string repr() const = 0;
// Human-readable long description.
virtual std::string description() const = 0;
};
// NodePass process on any Node types.
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
namespace paddle {
namespace inference {
namespace analysis {
void DfgPassManager::RunAll() {
PADDLE_ENFORCE(argument_);
for (auto& pass : data_) {
VLOG(4) << "Running pass [" << pass->repr() << "]";
pass->Run(argument_->main_dfg.get());
}
}
void NodePassManager::RunAll() {
PADDLE_ENFORCE(argument_);
PADDLE_ENFORCE(argument_->main_dfg.get());
auto trait =
GraphTraits<DataFlowGraph>(argument_->main_dfg.get()).nodes_in_DFS();
for (auto& node : trait) {
for (auto& pass : data_) {
pass->Run(&node);
}
}
}
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file defines the logic of pass management. The analysis for inference is
* a pipeline of Passes, a PassManager is a agency that helps to manage the
* executation of the Passes.
*
* There are two modes of Passes, the first one is called NodePass and takes
* an Node as input and output; the second one is called DFGPass and takes a
* DFG(Data Flow Graph) as input and output. It is hard to put all the passes in
* the same pipeline, there are two kinds of PassManagers, both takes a DFG as
* input and output a DFG, but the Passes inside are different:
*
* 1. NodePassManager: the passes inside are all NodePasses, it can have
* different graph trivial algorithm, for example, DFS_NodePassManager will
* trigger the passes in depth first order;
* 2. DfgPassManager: the passes inside are all DfgPasses.
*/
#pragma once
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* PassManager is the base class for all pass managers, a pass manager has
* several Pass-es registered, and execute them in the linear order.
*/
class PassManager : public OrderedRegistry<Pass> {
public:
PassManager() = default;
// Call all the passes' Initialize methods. The desc and data_flow_graph are
// globally shared, so pass them as the arguemnts for all the pass managers.
virtual bool Initialize(const Argument& argument) { return false; }
virtual bool Initialize(Argument* argument) {
argument_ = argument;
for (auto& pass : data_) {
LOG(INFO) << "Initializing pass " << pass->repr();
if (!pass->Initialize(argument)) {
LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]";
return false;
}
}
return true;
}
// Call all the passes' Finalize methods.
virtual bool Finalize() {
for (auto& pass : data_) {
if (!pass->Finalize()) {
LOG(ERROR) << "Failed to finalize pass [" << pass->repr() << "]";
return false;
}
}
return true;
}
// Run all the passes.
virtual void RunAll() = 0;
// Short identifier.
virtual std::string repr() const = 0;
// Long description.
virtual std::string description() const = 0;
virtual ~PassManager() = default;
protected:
Argument* argument_{nullptr};
};
/*
* A pass manager that process a DFG.
*/
class DfgPassManager : public PassManager {
public:
DfgPassManager() = default;
void RunAll() override;
virtual ~DfgPassManager() = default;
};
/*
* A pass manager that process a Node each time.
*/
class NodePassManager : public PassManager {
public:
NodePassManager() = default;
void RunAll() override;
virtual ~NodePassManager() = default;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include <gtest/gtest.h>
namespace paddle {
namespace inference {
namespace analysis {
class TestDfgPassManager final : public DfgPassManager {
public:
TestDfgPassManager() = default;
virtual ~TestDfgPassManager() = default;
// Short identifier.
std::string repr() const override { return "test-pass-manager"; }
// Long description.
std::string description() const override { return "test doc"; }
};
class TestNodePassManager final : public NodePassManager {
public:
virtual ~TestNodePassManager() = default;
std::string repr() const override { return "test-node-pass-manager"; }
std::string description() const override { return "test doc"; }
};
class TestNodePass final : public NodePass {
public:
virtual ~TestNodePass() = default;
bool Initialize(Argument* argument) override { return true; }
void Run(Node* node) override {
LOG(INFO) << "- Processing node " << node->repr();
}
std::string repr() const override { return "test-node"; }
std::string description() const override { return "some doc"; }
};
TEST_F(DFG_Tester, DFG_pass_manager) {
TestDfgPassManager manager;
DFG_GraphvizDrawPass::Config config("./", "dfg.dot");
manager.Register("fluid-to-flow-graph", new FluidToDataFlowGraphPass);
manager.Register("graphviz", new DFG_GraphvizDrawPass(config));
manager.Register("dfg-to-fluid", new DataFlowGraphToFluidPass);
ASSERT_TRUE(manager.Initialize(&argument));
manager.RunAll();
}
TEST_F(DFG_Tester, Node_pass_manager) {
// Pre-process: initialize the DFG with the ProgramDesc first.
FluidToDataFlowGraphPass pass0;
pass0.Initialize(&argument);
pass0.Run(argument.main_dfg.get());
TestNodePassManager manager;
manager.Register("test-node-pass", new TestNodePass);
ASSERT_TRUE(manager.Initialize(&argument));
manager.RunAll();
}
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -19,22 +19,23 @@ namespace paddle {
namespace inference {
namespace analysis {
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
TEST_F(DFG_Tester, Split) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
LOG(INFO) << "spliter\n" << dfg.DotString();
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
ASSERT_GT(dfg.nodes.size(), 5UL);
auto subgraphs = SubGraphSplitter(&dfg, teller)();
......@@ -62,6 +63,28 @@ TEST_F(DFG_Tester, Split) {
ASSERT_EQ(subgraphs.back().size(), 6UL);
}
TEST_F(DFG_Tester, Fuse) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
size_t count0 = dfg.nodes.size();
SubGraphFuse fuse(&dfg, teller);
fuse();
int count1 = 0;
for (auto& node : dfg.nodes.nodes()) {
if (node->deleted()) {
LOG(INFO) << "deleted " << node->repr();
}
count1 += node->deleted();
}
// At least one nodes should be deleted.
ASSERT_EQ(dfg.nodes.size(), count0 + 1); // added a new FunctionBlock
ASSERT_EQ(6UL, count1);
}
} // namespace analysis
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,50 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "gtest/gtest.h"
#include "paddle/contrib/tape/function.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
using namespace paddle::tape;
namespace paddle {
namespace inference {
namespace analysis {
TEST(Tape, TestMLP) {
LOG(INFO) << "TestMLP";
Linear linear1(3, 3, "relu");
Linear linear2(3, 3, "relu");
Mean mean;
TensorRTSubGraphPass::TensorRTSubGraphPass(
const TensorRTSubGraphPass::NodeInsideSubgraphTeller &teller)
: node_inside_subgraph_teller_(teller) {}
SGD sgd(0.001);
std::string initializer = "fill_constant";
paddle::framework::AttributeMap attrs;
attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32;
attrs["shape"] = std::vector<int>{3, 3};
attrs["value"] = 1.0f;
Fill filler(initializer, attrs);
for (int i = 0; i < 2; ++i) {
reset_global_tape();
VariableHandle input(new Variable("input"));
filler(input);
auto loss = mean(linear2(linear1(input)));
get_global_tape().Backward(loss);
for (auto w : linear1.Params()) {
sgd(w);
}
for (auto w : linear2.Params()) {
sgd(w);
}
}
void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse(graph, node_inside_subgraph_teller_);
}
int main(int argc, char** argv) {
std::vector<paddle::platform::Place> places;
places.emplace_back(paddle::platform::CPUPlace());
paddle::platform::DeviceContextPool::Init(places);
} // analysis
} // inference
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/inference/analysis/node.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* Parse the graph and replace TensorRT supported nodes with SubGraphNode
*/
class TensorRTSubGraphPass : public DataFlowGraphPass {
public:
// Tell whether to transform a sub-graph into TensorRT.
using NodeInsideSubgraphTeller = SubGraphFuse::NodeInsideSubgraphTeller;
TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller);
bool Initialize(Argument* argument) override { return true; }
// This class get a sub-graph as input and determine whether to transform this
// sub-graph into TensorRT.
void Run(DataFlowGraph* graph) override;
private:
NodeInsideSubgraphTeller node_inside_subgraph_teller_;
};
} // namespace analysis
} // namespace inference
} // paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
DEFINE_string(model_dir, "", "inference test model dir");
TEST(TensorRTSubGraph, single_pass) {
auto desc = LoadProgramDesc();
auto dfg = ProgramDescToDFG(desc);
SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) {
if (node->type() != Node::Type::kFunction) return false;
const auto* func = static_cast<const Function*>(node);
if (func->func_type() == "elementwise_add" || func->func_type() == "relu" ||
func->func_type() == "conv2d" || func->func_type() == "mul" ||
func->func_type() == "sigmoid" || func->func_type() == "softmax") {
LOG(INFO) << "sub-graph marked " << node->repr();
return true;
}
return false;
};
DFG_GraphvizDrawPass::Config config{"./", "test"};
DFG_GraphvizDrawPass dfg_pass(config);
dfg_pass.Initialize();
DFG_GraphvizDrawPass dfg_pass1(config);
dfg_pass1.Initialize();
dfg_pass.Run(&dfg);
TensorRTSubGraphPass trt_pass(std::move(teller));
trt_pass.Initialize();
trt_pass.Run(&dfg);
dfg_pass1.Run(&dfg);
// Check the TRT op's block desc
for (auto node : dfg.nodes.nodes()) {
if (node->IsFunctionBlock()) {
}
}
}
TEST(TensorRTSubGraph, pass_manager) {}
} // namespace analysis
} // namespace inference
} // namespace paddle
......@@ -15,33 +15,46 @@ limitations under the License. */
#pragma once
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <string>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/io.h"
namespace paddle {
namespace inference {
// Read ProgramDesc from a __model__ file, defined in io.cc
extern void ReadBinaryFile(const std::string& filename, std::string* contents);
namespace analysis {
DEFINE_string(inference_model_dir, "", "inference test model dir");
static framework::proto::ProgramDesc LoadProgramDesc(
const std::string& model_dir = FLAGS_inference_model_dir) {
paddle::platform::CPUPlace place;
paddle::framework::Executor executor(place);
paddle::framework::Scope scope;
auto program = Load(&executor, &scope, model_dir);
return *program->Proto();
std::string msg;
std::string net_file = FLAGS_inference_model_dir + "/__model__";
std::ifstream fin(net_file, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", net_file);
fin.seekg(0, std::ios::end);
msg.resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(msg.at(0)), msg.size());
fin.close();
framework::proto::ProgramDesc program_desc;
program_desc.ParseFromString(msg);
return program_desc;
}
static DataFlowGraph ProgramDescToDFG(
const framework::proto::ProgramDesc& desc) {
DataFlowGraph graph;
FluidToDataFlowGraphPass pass;
pass.Initialize(desc);
Argument argument;
argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc));
pass.Initialize(&argument);
pass.Run(&graph);
pass.Finalize();
return graph;
......@@ -49,9 +62,12 @@ static DataFlowGraph ProgramDescToDFG(
class DFG_Tester : public ::testing::Test {
protected:
void SetUp() override { desc = LoadProgramDesc(FLAGS_inference_model_dir); }
void SetUp() override {
auto desc = LoadProgramDesc(FLAGS_inference_model_dir);
argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc));
}
framework::proto::ProgramDesc desc;
Argument argument;
};
} // namespace analysis
......
......@@ -12,16 +12,20 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/operators/mkldnn_activation_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using framework::DataLayout;
using framework::Tensor;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using platform::GetMKLDNNFormat;
using platform::MKLDNNDeviceContext;
using platform::to_void_cast;
namespace {
std::string gethash(const mkldnn::memory::dims &operand_dims,
......@@ -35,188 +39,260 @@ std::string gethash(const mkldnn::memory::dims &operand_dims,
};
return dim2str(operand_dims) + std::to_string(algorithm);
}
} // namespace
template <typename Functor>
class MKLDNNActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto *x = ctx.Input<Tensor>("X");
PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN &&
x->format() != memory::format::format_undef,
"Wrong layout/format set for Input x tensor");
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = ctx.Attr<float>(attr.first);
}
functor(ctx);
}
};
template <typename T, typename ExecContext>
void eltwise_forward(const ExecContext &ctx, mkldnn::algorithm algorithm,
const T alpha = 0, const T beta = 0) {
template <typename Functor>
class MKLDNNActivationGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
PADDLE_ENFORCE(diff_y->layout() == DataLayout::kMKLDNN &&
diff_y->format() != memory::format::format_undef,
"Wrong layout/format set for Input OutGrad tensor");
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = ctx.Attr<float>(attr.first);
}
functor(ctx);
}
};
template <typename T>
void eltwise_forward(const framework::ExecutionContext &ctx,
mkldnn::algorithm algorithm, const T alpha = 0,
const T beta = 0) {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto &mkldnn_engine = dev_ctx.GetEngine();
// get buffers
const auto *src = ctx.template Input<Tensor>("X");
const auto *src_data = src->template data<T>();
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
auto *dst = ctx.template Output<Tensor>("Out");
T *dst_data = dst->template mutable_data<T>(ctx.GetPlace());
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
// get memory dim
PADDLE_ENFORCE(src->dims().size() == 2 || src->dims().size() == 4,
PADDLE_ENFORCE(x->dims().size() == 2 || x->dims().size() == 4,
"Input dim must be with 2 or 4");
std::vector<int> src_tz = framework::vectorize2int(src->dims());
std::vector<int> src_tz = framework::vectorize2int(x->dims());
auto src_format =
src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format();
const std::string key = gethash(src_tz, algorithm);
const std::string key_src_data =
key + ctx.op().Output("Out") + "@eltwise_fwd_src_data";
const std::string key_src_mem = key + "@eltwise_fwd_src_mem";
const std::string key_dst_mem = key + "@eltwise_fwd_dst_mem";
const std::string key_fwd = key + "@eltwise_fwd";
const std::string key_src_layout =
key + ctx.op().Output("Out") + "@eltwise_fwd_src_layout";
const std::string key_with_layout = key + std::to_string(src_format);
const std::string key_src_mem = key_with_layout + "@eltwise_fwd_src_mem";
const std::string key_dst_mem = key_with_layout + "@eltwise_fwd_dst_mem";
const std::string key_fwd = key_with_layout + "@eltwise_fwd";
const std::string key_fwd_pd = key_with_layout + "@eltwise_fwd_pd";
// save input data and layout to be referred in backward path
auto p_src_data = std::make_shared<const T *>(x_data);
dev_ctx.SetBlob(key_src_data, p_src_data);
auto p_src_layout = std::make_shared<memory::format>(src_format);
dev_ctx.SetBlob(key_src_layout, p_src_layout);
auto p_fwd = std::static_pointer_cast<mkldnn::eltwise_forward>(
dev_ctx.GetBlob(key_fwd));
// save input data to be referred in backward path
auto p_src_data = std::make_shared<const T *>(src_data);
dev_ctx.SetBlob(key_src_data, p_src_data);
std::shared_ptr<memory> dst_memory;
if (p_fwd == nullptr) {
// create memory description
auto data_md = src_tz.size() == 2
? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32,
mkldnn::memory::format::nc)
: platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32,
mkldnn::memory::format::nchw);
// create memory primitives
auto p_src_mem = std::make_shared<mkldnn::memory>(mkldnn::memory(
{data_md, mkldnn_engine}, platform::to_void_cast(src_data)));
dev_ctx.SetBlob(key_src_mem, p_src_mem);
auto p_dst_mem = std::make_shared<mkldnn::memory>(mkldnn::memory(
{data_md, mkldnn_engine}, platform::to_void_cast(dst_data)));
dev_ctx.SetBlob(key_dst_mem, p_dst_mem);
auto fwd_desc = mkldnn::eltwise_forward::desc(
mkldnn::prop_kind::forward_training, algorithm, data_md, alpha, beta);
auto p_fwd_pd = std::make_shared<mkldnn::eltwise_forward::primitive_desc>(
fwd_desc, mkldnn_engine);
const std::string key_fwd_pd = key + "eltwise_fwd_pd";
dev_ctx.SetBlob(key_fwd_pd, p_fwd_pd);
p_fwd = std::make_shared<mkldnn::eltwise_forward>(
*p_fwd_pd, *(p_src_mem.get()), *(p_dst_mem.get()));
// create mkldnn memory for input X
auto src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), src_format);
auto src_memory = std::shared_ptr<memory>(
new memory({src_md, mkldnn_engine}, to_void_cast(x_data)));
// save src_memory to be referred in backward path
dev_ctx.SetBlob(key_src_mem, src_memory);
// create primitive descriptor for activation forward and save it
auto forward_desc = mkldnn::eltwise_forward::desc(
mkldnn::prop_kind::forward_training, algorithm,
src_memory->get_primitive_desc().desc(), alpha, beta);
auto forward_pd = std::make_shared<mkldnn::eltwise_forward::primitive_desc>(
forward_desc, mkldnn_engine);
// save prim desc into global device context to be referred in backward path
dev_ctx.SetBlob(key_fwd_pd, forward_pd);
// create mkldnn memory for output y
dst_memory =
std::make_shared<memory>(forward_pd->dst_primitive_desc(), y_data);
dev_ctx.SetBlob(key_dst_mem, dst_memory);
// create activation primitive
p_fwd = std::make_shared<mkldnn::eltwise_forward>(*forward_pd, *src_memory,
*dst_memory);
dev_ctx.SetBlob(key_fwd, p_fwd);
} else {
// primitives already exist
auto p_src_mem =
auto src_memory =
std::static_pointer_cast<mkldnn::memory>(dev_ctx.GetBlob(key_src_mem));
PADDLE_ENFORCE(p_src_mem != nullptr,
"Fail to find eltwise p_src_mem in device context.");
auto p_dst_mem =
PADDLE_ENFORCE(src_memory != nullptr,
"Fail to find eltwise src_memory in device context.");
dst_memory =
std::static_pointer_cast<mkldnn::memory>(dev_ctx.GetBlob(key_dst_mem));
PADDLE_ENFORCE(p_dst_mem != nullptr,
"Fail to find eltwise p_src_mem in device context.");
PADDLE_ENFORCE(dst_memory != nullptr,
"Fail to find eltwise dst_memory in device context.");
p_src_mem->set_data_handle(platform::to_void_reinterpret_cast(src_data));
p_dst_mem->set_data_handle(dst_data);
src_memory->set_data_handle(platform::to_void_cast(x_data));
dst_memory->set_data_handle(y_data);
}
// push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline = {*(p_fwd.get())};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
std::vector<primitive> pipeline;
pipeline.push_back(*p_fwd);
stream(stream::kind::eager).submit(pipeline).wait();
y->set_layout(DataLayout::kMKLDNN);
y->set_format(GetMKLDNNFormat(*dst_memory));
}
template <typename T, typename ExecContext>
void eltwise_grad(const ExecContext &ctx, mkldnn::algorithm algorithm,
const T alpha = 0, const T beta = 0) {
template <typename T>
void eltwise_grad(const framework::ExecutionContext &ctx,
mkldnn::algorithm algorithm, const T alpha = 0,
const T beta = 0) {
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto &mkldnn_engine = dev_ctx.GetEngine();
// get buffers
const auto *out = ctx.template Input<Tensor>("Out");
auto *dout = ctx.template Input<Tensor>(framework::GradVarName("Out"));
const auto *diff_dst = dout->template data<T>();
const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto *diff_x = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *dx =
ctx.template Output<framework::Tensor>(framework::GradVarName("X"));
const T *diff_src = dx->template mutable_data<T>(ctx.GetPlace());
const T *diff_y_data = diff_y->data<T>();
T *diff_x_data = diff_x->mutable_data<T>(ctx.GetPlace());
// get memory dim
std::vector<int> src_tz = framework::vectorize2int(out->dims());
std::vector<int> diff_dst_tz = framework::vectorize2int(diff_y->dims());
const std::string key = gethash(src_tz, algorithm);
const std::string key_diff_src_mem = key + "@eltwise_diff_src_mem";
const std::string key_diff_dst_mem = key + "@eltwise_diff_dst_mem";
const std::string key_grad = key + "@eltwise_grad";
auto diff_y_format =
diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format();
const std::string key = gethash(diff_dst_tz, algorithm);
const std::string key_src_data =
key + ctx.op().Input("Out") + "@eltwise_fwd_src_data";
const std::string key_src_layout =
key + ctx.op().Input("Out") + "@eltwise_fwd_src_layout";
const auto p_src_layout =
std::static_pointer_cast<memory::format>(dev_ctx.GetBlob(key_src_layout));
const std::string key_src_mem =
key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem";
const std::string key_fwd_pd =
key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd";
const std::string key_with_layouts =
key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format);
const std::string key_diff_src_mem =
key_with_layouts + "@eltwise_diff_src_mem";
const std::string key_diff_dst_mem =
key_with_layouts + "@eltwise_diff_dst_mem";
const std::string key_grad = key_with_layouts + "@eltwise_grad";
const auto p_src_data =
std::static_pointer_cast<T *>(dev_ctx.GetBlob(key_src_data));
const std::string key_src_mem = key + "@eltwise_fwd_src_mem";
auto p_src_mem =
auto src_memory =
std::static_pointer_cast<mkldnn::memory>(dev_ctx.GetBlob(key_src_mem));
p_src_mem->set_data_handle(*p_src_data.get());
PADDLE_ENFORCE(src_memory != nullptr,
"Fail to find src_memory in device context");
src_memory->set_data_handle(*p_src_data.get());
std::shared_ptr<memory> diff_src_memory;
auto p_grad = std::static_pointer_cast<mkldnn::eltwise_forward::primitive>(
auto p_grad = std::static_pointer_cast<mkldnn::eltwise_backward>(
dev_ctx.GetBlob(key_grad));
if (p_grad == nullptr) {
// create memory description
auto data_md = src_tz.size() == 2
? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32,
mkldnn::memory::format::nc)
: platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32,
mkldnn::memory::format::nchw);
// create memory primitives
std::shared_ptr<void> p_diff_src_mem =
std::make_shared<mkldnn::memory>(mkldnn::memory(
{data_md, mkldnn_engine}, platform::to_void_cast(diff_src)));
dev_ctx.SetBlob(key_diff_src_mem, p_diff_src_mem);
std::shared_ptr<void> p_diff_dst_mem =
std::make_shared<mkldnn::memory>(mkldnn::memory(
{data_md, mkldnn_engine}, platform::to_void_cast(diff_dst)));
dev_ctx.SetBlob(key_diff_dst_mem, p_diff_dst_mem);
auto bwd_desc = mkldnn::eltwise_backward::desc(algorithm, data_md, data_md,
alpha, beta);
const std::string key_fwd_pd = key + "eltwise_fwd_pd";
auto *p_fwd_pd = static_cast<mkldnn::eltwise_forward::primitive_desc *>(
dev_ctx.GetBlob(key_fwd_pd).get());
auto eltwise_bwd_prim_desc = mkldnn::eltwise_backward::primitive_desc(
bwd_desc, mkldnn_engine, *p_fwd_pd);
// create mkldnn memory for input diff_y
auto diff_dst_md = platform::MKLDNNMemDesc(
diff_dst_tz, platform::MKLDNNGetDataType<T>(), diff_y_format);
auto diff_dst_memory = std::shared_ptr<memory>(
new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data)));
dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory);
// retrieve eltwise primitive desc from device context
auto forward_pd =
std::static_pointer_cast<mkldnn::eltwise_forward::primitive_desc>(
dev_ctx.GetBlob(key_fwd_pd));
PADDLE_ENFORCE(forward_pd != nullptr,
"Fail to find eltwise_fwd_pd in device context");
// ceate primitive descriptor for activation backward
auto backward_desc = mkldnn::eltwise_backward::desc(
algorithm, diff_dst_memory->get_primitive_desc().desc(),
src_memory->get_primitive_desc().desc(), alpha, beta);
auto backward_pd = mkldnn::eltwise_backward::primitive_desc(
backward_desc, mkldnn_engine, *forward_pd);
// create mkldnn memory for output diff_src
diff_src_memory = std::make_shared<memory>(
backward_pd.diff_src_primitive_desc(), diff_x_data);
dev_ctx.SetBlob(key_diff_src_mem, diff_src_memory);
// create activation backward primitive
p_grad = std::make_shared<mkldnn::eltwise_backward>(
eltwise_bwd_prim_desc, *static_cast<mkldnn::memory *>(p_src_mem.get()),
*(static_cast<mkldnn::memory *>(p_diff_dst_mem.get())),
*(static_cast<mkldnn::memory *>(p_diff_src_mem.get())));
backward_pd, *src_memory, *diff_dst_memory, *diff_src_memory);
dev_ctx.SetBlob(key_grad, p_grad);
} else {
// primitives already exist
auto p_diff_src_mem = std::static_pointer_cast<mkldnn::memory>(
diff_src_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_diff_src_mem));
auto p_diff_dst_mem = std::static_pointer_cast<mkldnn::memory>(
auto diff_dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_diff_dst_mem));
p_diff_src_mem->set_data_handle(
platform::to_void_reinterpret_cast(diff_src));
p_diff_dst_mem->set_data_handle(
platform::to_void_reinterpret_cast(diff_dst));
diff_src_memory->set_data_handle(
platform::to_void_reinterpret_cast(diff_x_data));
diff_dst_memory->set_data_handle(
platform::to_void_reinterpret_cast(diff_y_data));
}
// push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline = {*(p_grad.get())};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
std::vector<primitive> pipeline;
pipeline.push_back(*p_grad);
stream(stream::kind::eager).submit(pipeline).wait();
diff_x->set_layout(DataLayout::kMKLDNN);
diff_x->set_format(GetMKLDNNFormat(*diff_src_memory));
}
} // anonymous namespace
template <typename T, mkldnn::algorithm algorithm>
struct MKLDNNActivationFunc : public BaseActivationFunctor<T> {
template <typename ExecContext>
void operator()(const ExecContext &ctx) const {
void operator()(const framework::ExecutionContext &ctx) const {
eltwise_forward<T>(ctx, algorithm);
}
};
template <typename T, mkldnn::algorithm algorithm>
struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> {
template <typename ExecContext>
void operator()(const ExecContext &ctx) const {
void operator()(const framework::ExecutionContext &ctx) const {
eltwise_grad<T>(ctx, algorithm);
}
};
......
......@@ -19,18 +19,20 @@ limitations under the License. */
namespace paddle {
namespace operators {
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \
void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \
AddAttr<bool>("use_mkldnn", \
"(default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddComment(OP_COMMENT); \
} \
using paddle::framework::Tensor;
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \
public: \
void Make() override { \
AddInput("X", "Input of " #OP_NAME " operator"); \
AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \
AddAttr<bool>("use_mkldnn", \
"(bool, default false) Only used in mkldnn kernel") \
.SetDefault(false); \
AddComment(#OP_COMMENT); \
} \
}
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \
......@@ -58,7 +60,6 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const framework::OperatorWithKernel& oper,
const std::string& name) {
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout = framework::DataLayout::kAnyLayout;
#ifdef PADDLE_WITH_MKLDNN
auto it = oper.Attrs().find("use_mkldnn");
......@@ -82,6 +83,7 @@ class ActivationOp : public framework::OperatorWithKernel {
ctx->ShareLoD("X", /*->*/ "Out");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "X");
......@@ -96,6 +98,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Out"));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "Out");
......@@ -140,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
__attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC(
TanhShrink Activation Operator.
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC";
......@@ -382,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
STanh Activation Operator.
$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
$$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
)DOC");
}
......
......@@ -60,34 +60,45 @@ template <typename DeviceContext, typename T>
class ConcatGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* in = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* out_grad =
ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto ins = ctx.MultiInput<framework::Tensor>("X");
auto out_var_names = ctx.Outputs(framework::GradVarName("X"));
auto outs = ctx.MultiOutput<framework::Tensor>(framework::GradVarName("X"));
int64_t axis = static_cast<int64_t>(ctx.Attr<int>("axis"));
// get output tensor that the name is not kEmptyVarName
std::vector<framework::Tensor*> outputs;
for (size_t j = 0; j < outs.size(); ++j) {
if (out_var_names[j] != framework::kEmptyVarName) {
outs[j]->mutable_data<T>(ctx.GetPlace());
outputs.push_back(outs[j]);
} else {
outputs.push_back(nullptr);
}
}
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && outs.size() < 10) {
size_t input_offset = 0;
auto in_stride = framework::stride_numel(in->dims());
const auto in_stride = framework::stride_numel(out_grad->dims());
for (auto& out : outs) {
out->mutable_data<T>(ctx.GetPlace());
auto out_stride = framework::stride_numel(out->dims());
StridedNumelCopyWithAxis<T>(ctx.device_context(), axis, out->data<T>(),
out_stride, in->data<T>() + input_offset,
in_stride, out_stride[axis]);
for (size_t i = 0; i < outs.size(); ++i) {
auto out_stride = framework::stride_numel(ins[i]->dims());
auto* out = outputs[i];
if (out != nullptr) {
StridedNumelCopyWithAxis<T>(
ctx.device_context(), axis, out->data<T>(), out_stride,
out_grad->data<T>() + input_offset, in_stride, out_stride[axis]);
}
input_offset += out_stride[axis];
}
} else {
std::vector<framework::Tensor> outputs(outs.size());
for (size_t j = 0; j < outs.size(); ++j) {
outs[j]->mutable_data<T>(ctx.GetPlace());
outputs[j] = *outs[j];
}
auto& dev_ctx = ctx.template device_context<DeviceContext>();
paddle::operators::math::ConcatGradFunctor<DeviceContext, T>
concat_grad_functor;
concat_grad_functor(dev_ctx, *in, static_cast<int>(axis), &outputs);
concat_grad_functor(dev_ctx, *out_grad, ins, static_cast<int>(axis),
&outputs);
}
}
};
......
......@@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Detection mAP evaluate operator.
The general steps are as follows. First, calculate the true positive and
false positive according to the input of detection and labels, then
calculate the mAP evaluate value.
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
false positive according to the input of detection and labels, then
calculate the mAP evaluate value.
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
)DOC");
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/operators/mean_op.h"
namespace paddle {
namespace operators {
using framework::DataLayout;
template <typename T>
class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
float mean = context.Attr<float>("mean");
float std = context.Attr<float>("std");
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
}
engine.seed(seed);
std::normal_distribution<T> dist(mean, std);
int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
// The format of output is set as the mkldnn's format
// TODO(@mozga-intel) The format of matrix sets inside the another layers.
tensor->set_layout(DataLayout::kMKLDNN);
tensor->set_format(mkldnn::memory::format::oihw);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(gaussian_random, MKLDNN, ::paddle::platform::CPUPlace,
ops::GaussianMKLDNNKernel<float>);
......@@ -15,6 +15,10 @@ limitations under the License. */
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
......@@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype")),
ctx.device_context());
ctx.device_context(), layout, library);
}
};
......@@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
"(int, default 5(FP32)) "
"Output data type.")
.SetDefault(framework::proto::VarType::FP32);
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
GaussianRandom Operator.
......
......@@ -70,35 +70,40 @@ template <typename T>
class ConcatGradFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const int axis,
std::vector<framework::Tensor>* outputs) {
const framework::Tensor& input,
const std::vector<const framework::Tensor*>& ref_inputs,
const int axis, std::vector<framework::Tensor*>* outputs) {
// TODO(zcd): Add input data validity checking
int num = outputs->size();
size_t num = outputs->size();
int input_rows = 1;
auto dim_0 = outputs->at(0).dims();
auto dim_0 = ref_inputs[0]->dims();
for (int i = 0; i < axis; ++i) {
input_rows *= dim_0[i];
}
int input_cols = 0;
std::vector<int64_t> output_cols(outputs->size());
for (int i = 0; i < num; ++i) {
int t_cols = outputs->at(i).numel() / input_rows;
for (size_t i = 0; i < num; ++i) {
int t_cols = ref_inputs[i]->numel() / input_rows;
input_cols += t_cols;
output_cols[i] = t_cols;
}
auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
for (int k = 0; k < input_rows; ++k) {
for (size_t k = 0; k < input_rows; ++k) {
const T* src_ptr = input.data<T>() + k * input_cols;
int col_idx = 0;
for (int j = 0; j < num; ++j) {
int col_len = output_cols[j];
T* dst_ptr = outputs->at(j).data<T>() + k * col_len;
memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx,
sizeof(T) * col_len);
auto* out_tensor = outputs->at(j);
if (out_tensor != nullptr) {
T* dst_ptr = out_tensor->data<T>() + k * col_len;
memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx,
sizeof(T) * col_len);
}
col_idx += col_len;
}
}
......
......@@ -22,43 +22,24 @@ namespace paddle {
namespace operators {
namespace math {
template <typename T>
__device__ T upper_bound(const T* first, T count, T val) {
const T* orig = first;
const T* it = nullptr;
T step = 0;
while (count > 0) {
it = first;
step = count / 2;
it += step;
if (!(val < *it)) {
first = ++it;
count -= step + 1;
} else {
count = step;
}
}
return first - orig;
}
template <typename T>
__global__ void KernelConcat(T** inputs, const int* input_cols, int col_size,
const int output_rows, const int output_cols,
T* output) {
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
int segment = upper_bound<int>(input_cols, col_size, tid_x) - 1;
int curr_offset = input_cols[segment];
int curr_segment = segment;
int curr_segment = 0;
int curr_offset = input_cols[0];
for (; tid_x < output_cols; tid_x += blockDim.x * gridDim.x) {
T curr_col_offset;
while ((curr_col_offset = input_cols[curr_segment + 1]) <= tid_x) {
int curr_col_offset = input_cols[curr_segment + 1];
while (curr_col_offset <= tid_x) {
curr_offset = curr_col_offset;
++curr_segment;
curr_col_offset = input_cols[curr_segment + 1];
}
int local_col = tid_x - curr_offset;
int segment_width = curr_col_offset - curr_offset;
T* input_ptr = inputs[curr_segment];
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
for (; tid_y < output_rows; tid_y += blockDim.y * gridDim.y)
......@@ -89,23 +70,25 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
const int in_col, const int* out_cols,
int out_cols_size, T** outputs_data) {
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
int segment = upper_bound<int>(out_cols, out_cols_size, tid_x) - 1;
int curr_offset = out_cols[segment];
int curr_segment = segment;
int curr_segment = 0;
int curr_offset = out_cols[0];
for (; tid_x < in_col; tid_x += blockDim.x * gridDim.x) {
T curr_col_offset;
while ((curr_col_offset = out_cols[curr_segment + 1]) <= tid_x) {
int curr_col_offset = out_cols[curr_segment + 1];
while (curr_col_offset <= tid_x) {
curr_offset = curr_col_offset;
++curr_segment;
curr_col_offset = out_cols[curr_segment + 1];
}
int local_col = tid_x - curr_offset;
int segment_width = curr_col_offset - curr_offset;
T* output_ptr = outputs_data[curr_segment];
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y)
output_ptr[tid_y * segment_width + local_col] =
input_data[tid_y * in_col + tid_x];
if (output_ptr != nullptr) {
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y)
output_ptr[tid_y * segment_width + local_col] =
input_data[tid_y * in_col + tid_x];
}
}
}
......@@ -118,10 +101,12 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
int split = tid_x / fixed_out_col;
int in_offset = tid_x - split * fixed_out_col;
T* output_ptr = outputs_data[split];
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y)
output_ptr[tid_y * fixed_out_col + in_offset] =
input_data[tid_y * in_col + tid_x];
if (output_ptr != nullptr) {
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y)
output_ptr[tid_y * fixed_out_col + in_offset] =
input_data[tid_y * in_col + tid_x];
}
}
}
......@@ -203,17 +188,18 @@ template <typename T>
class ConcatGradFunctor<platform::CUDADeviceContext, T> {
public:
void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& input, const int axis,
std::vector<framework::Tensor>* outputs) {
const framework::Tensor& input,
const std::vector<const framework::Tensor*>& ref_inputs,
const int axis, std::vector<framework::Tensor*>* outputs) {
// TODO(zcd): Add input data validity checking
int o_num = outputs->size();
int out_row = 1;
auto dim_0 = outputs->at(0).dims();
auto dim_0 = ref_inputs[0]->dims();
for (int i = 0; i < axis; ++i) {
out_row *= dim_0[i];
}
int out_col = outputs->at(0).numel() / out_row;
int out0_col = ref_inputs[0]->numel() / out_row;
int in_col = 0, in_row = out_row;
bool sameShape = true;
......@@ -223,13 +209,17 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
outputs_cols[0] = 0;
for (int i = 0; i < o_num; ++i) {
int t_col = outputs->at(i).numel() / out_row;
int t_col = ref_inputs.at(i)->numel() / out_row;
if (sameShape) {
if (t_col != out_col) sameShape = false;
if (t_col != out0_col) sameShape = false;
}
in_col += t_col;
outputs_cols[i + 1] = in_col;
outputs_ptr[i] = outputs->at(i).data<T>();
if (outputs->at(i) != nullptr) {
outputs_ptr[i] = outputs->at(i)->data<T>();
} else {
outputs_ptr[i] = nullptr;
}
}
T** dev_out_gpu_data =
......@@ -255,7 +245,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
if (sameShape) {
KernelConcatGrad<<<grid_size, block_size, 0, context.stream()>>>(
input.data<T>(), in_row, in_col, out_col, dev_out_gpu_data);
input.data<T>(), in_row, in_col, out0_col, dev_out_gpu_data);
} else {
const int* dev_outs_col_data = outputs_cols.CUDAData(context.GetPlace());
KernelConcatGrad<<<grid_size, block_size, 0, context.stream()>>>(
......
......@@ -57,7 +57,8 @@ template <typename DeviceContext, typename T>
class ConcatGradFunctor {
public:
void operator()(const DeviceContext& context, const framework::Tensor& input,
const int axis, std::vector<framework::Tensor>* outputs);
const std::vector<const framework::Tensor*>& ref_inputs,
const int axis, std::vector<framework::Tensor*>* outputs);
};
} // namespace math
......
......@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{});
framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]);
......
......@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{});
{{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, place);
cur_scope.Rename(new_inside_name, inside_grad_name);
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::CPUDeviceContext;
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
using mkldnn::reorder;
using platform::to_void_cast;
template <typename T>
class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto in_vars = ctx.MultiInputVar("X");
const int N = in_vars.size();
auto out_var = ctx.OutputVar("Out");
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = framework::vectorize2int(output->dims());
auto src_tz = dst_tz;
memory::format output_format{memory::format::format_undef};
std::vector<float> scales;
std::vector<memory::primitive_desc> srcs_mpd;
std::vector<mkldnn::memory> srcs_mem;
PADDLE_ENFORCE(in_vars[0]->IsType<LoDTensor>(),
"Input[0] must be LoDTensors");
auto& input0 = in_vars[0]->Get<LoDTensor>();
PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN &&
input0.format() != memory::format::format_undef,
"Wrong layout/format for inputs[0]");
memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = in_place ? 1 : 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors");
auto& input = in_vars[i]->Get<LoDTensor>();
PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN &&
input.format() != memory::format::format_undef,
"Wrong layout/format for inputs");
if (input.numel() == 0) {
continue;
}
const T* input_data = input.data<T>();
auto src_md =
memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine);
auto src_mem = memory(src_mpd, to_void_cast(input_data));
srcs_mpd.push_back(src_mpd);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
} else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
}
std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
inputs.push_back(srcs_mem[i]);
}
auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem);
output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd);
primitive reorder_prim;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(new memory(
{{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine},
output_data));
reorder_prim = reorder(*dst_mem, *target_mem);
}
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else if (out_var->IsType<framework::SelectedRows>()) {
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0.get();
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim =
framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<T>(ctx.GetPlace());
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::SumMKLDNNOpKernel<float>);
......@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
using framework::Tensor;
......@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto x_vars = ctx.MultiInputVar("X");
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
if (x_vars[0]->IsType<framework::LoDTensor>()) {
int dtype = -1;
for (auto& x_var : x_vars) {
......@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
"Sum operator should have at least one tensor");
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(dtype),
ctx.device_context());
static_cast<framework::proto::VarType::Type>(dtype), ctx.GetPlace(),
layout, library);
} else if (x_vars[0]->IsType<framework::SelectedRows>()) {
for (auto& var : x_vars) {
auto& value = var->Get<framework::SelectedRows>().value();
if (value.IsInitialized()) {
return framework::OpKernelType(framework::ToDataType(value.type()),
ctx.device_context());
ctx.device_context(), layout, library);
}
}
// if input sparse vars are not initialized, use an default kernel type.
return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.device_context());
ctx.device_context(), layout, library);
} else if (x_vars[0]->IsType<framework::LoDTensorArray>()) {
for (auto& x_var : x_vars) {
auto& array = x_var->Get<framework::LoDTensorArray>();
for (auto& each : array) {
if (each.numel() != 0) {
return framework::OpKernelType(framework::ToDataType(each.type()),
ctx.device_context());
ctx.device_context(), layout,
library);
}
}
}
......@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "(vector<Tensor>) The input tensors of sum operator.")
.AsDuplicable();
AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Sum operator.
......@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
framework::BlockDesc* block) const override {
auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarType::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) {
VLOG(10) << name << " "
<< block->FindRecursiveOrCreateVar(name).GetType();
......@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker,
ops::SumOpVarTypeInference);
REGISTER_OP_CPU_KERNEL(
sum, ops::SumKernel<paddle::platform::CPUDeviceContext, float>,
ops::SumKernel<paddle::platform::CPUDeviceContext, double>,
......
......@@ -240,7 +240,7 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
}
// Test with a larger FC layer.
TEST(TensorRTEngineOp, fc) { Execute(40, 256, 256); }
TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); }
} // namespace operators
} // namespace paddle
......
......@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->set_lod(inside_tensor.lod());
}
}
auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{});
{{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name);
}
......
......@@ -28,9 +28,15 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1,
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc");
DEFINE_uint64(
initial_cpu_memory_in_mb, 500,
"Default initial 500MB of CPU memory for PaddlePaddle, in MD unit.");
DEFINE_uint64(initial_cpu_memory_in_mb,
#ifdef PADDLE_WITH_MKLDNN
/* Aligned with mozga-intel, MKLDNN need at least 5000 MB
* to obtain the best performance*/
5000,
#else
500,
#endif
"Initial CPU memory for PaddlePaddle, in MD unit.");
DEFINE_double(
fraction_of_cuda_pinned_memory_to_use, 0.5,
......@@ -59,10 +65,7 @@ inline size_t CpuTotalPhysicalMemory() {
size_t CpuMaxAllocSize() {
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return std::min(
static_cast<size_t>(FLAGS_fraction_of_cpu_memory_to_use *
CpuTotalPhysicalMemory()),
static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
}
size_t CpuMinChunkSize() {
......@@ -71,8 +74,11 @@ size_t CpuMinChunkSize() {
}
size_t CpuMaxChunkSize() {
// Allow to allocate the maximum chunk size is roughly 3% of CPU memory.
return CpuMaxAllocSize() / 32;
// Allow to allocate the maximum chunk size is roughly 3% of CPU memory,
// or the initial_cpu_memory_in_mb.
return std::min(
static_cast<size_t>(CpuMaxAllocSize() / 32),
static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
}
size_t CUDAPinnedMaxAllocSize() {
......
......@@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
memory.get_primitive_desc().desc().data.format);
}
inline mkldnn::memory::format GetMKLDNNFormat(
const mkldnn::sum::primitive_desc& memory) {
return static_cast<mkldnn::memory::format>(
memory.dst_primitive_desc().desc().data.format);
}
} // namespace platform
} // namespace paddle
......@@ -30,8 +30,9 @@ int main(int argc, char** argv) {
new_argv.push_back(
strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
#else
new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
new_argv.push_back(strdup("--undefok=use_mkldnn"));
new_argv.push_back(strdup(
"--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb"));
new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb"));
#endif
int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data();
......
......@@ -118,7 +118,7 @@ def __bootstrap__():
read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope', 'use_mkldnn'
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb'
]
if core.is_compiled_with_cuda():
read_env_flags += [
......
......@@ -36,6 +36,25 @@ def _is_number_or_matrix_(var):
class WeightedAverage(object):
"""
Calculate weighted average.
The average calculating is accomplished via Python totally.
They do not change Paddle's Program, nor do anything to
modify NN model's configuration. They are completely
wrappers of Python functions.
Examples:
.. code-block:: python
avg = fluid.average.WeightedAverage()
avg.add(value=2.0, weight=1)
avg.add(value=4.0, weight=2)
avg.eval()
# The result is 3.333333333.
# For (2.0 * 1 + 4.0 * 2) / (1 + 2) = 3.333333333
"""
def __init__(self):
warnings.warn(
"The %s is deprecated, please use fluid.metrics.Accuracy instead." %
......
......@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
for idx, op_desc in enumerate(op_descs):
for var_name in op_desc.input_arg_names():
if len(renamed_vars[var_name]) > 1:
pending_sum_ops.append(
(_create_op_desc_("sum", {"X": renamed_vars[var_name]},
{"Out": [var_name]}, {}), idx))
pending_sum_ops.append((_create_op_desc_(
"sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]},
{"use_mkldnn": False}), idx))
renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names():
if var_name == core.empty_var_name(
......@@ -147,7 +147,7 @@ def _addup_repetitive_outputs_(op_descs):
else:
if len(renamed_vars[var_name]) == 1:
new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name])
str(var_rename_count[var_name])
var_rename_count[var_name] += 1
# rename original var_name
renamed_vars[var_name][0] = new_name
......@@ -155,14 +155,15 @@ def _addup_repetitive_outputs_(op_descs):
_rename_arg_(pending_sum_ops, var_name, new_name)
new_name = var_name + "@RENAME@" + \
str(var_rename_count[var_name])
str(var_rename_count[var_name])
var_rename_count[var_name] += 1
op_desc.rename_output(var_name, new_name)
renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1:
pending_sum_ops.append((_create_op_desc_(
"sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs)))
pending_sum_ops.append(
(_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]},
{"use_mkldnn": False}), len(op_descs)))
# sum_op descs are sorted according to their insert position
for p in reversed(pending_sum_ops):
op_descs.insert(p[1], p[0])
......@@ -434,18 +435,65 @@ def _get_stop_gradients_(program):
def append_backward(loss, parameter_list=None, no_grad_set=None,
callbacks=None):
"""
Append backward part to main_program
Append backward part to main_program.
Args:
loss(Variable): The variable generated by cost function.
parameter_list(list[string]): Parameters that need to be updated by
optimizer. If None, it means all parameters need to be updated.
no_grad_set(set): Variables that have no gradients in Block 0.
All variables with `step_gradient=True` from all blocks will be
automatically added.
A complete neural network training is made up of forward and backward
propagation. However, when we configure a network, we only need to
specify its forwrd part. The backward part is generated automatically
according to the forward part by this function.
Return:
(list[(Variable,Variable)]): list of (parameter, gradient) pair.
In most cases, users do not need to invoke this function manually. It
will be automatically invoked by the optimizer's `minimize` function.
Args:
loss(Variable): The loss variable of the network.
parameter_list(list[string]|None): Names of parameters that need
to be updated by optimizers.
If it is None, all parameters
will be updated.
Default: None
no_grad_set(set|None): Variables in the Block 0 whose gradients
should be ignored. All variables with
`step_gradient=True` from all blocks will
be automatically added into this set.
Default: None
callbacks(list[callable object]|None): The callbacks are used for
doing some custom jobs during
backward part building. All
callable objects in it will
be invoked once each time a
new gradient operator is added
into the program. The callable
object must has two input
parameters: 'block' and 'context'.
The 'block' is the block which
the new gradient operator will
be added to. The 'context' is a
map, whose keys are gradient
variable names and values are
corresponding original variables.
In addition to this, the 'context'
has another special key-value pair:
the key is string '__current_op_desc__'
and the value is the op_desc of the
gradient operator who has just
triggered the callable object.
Returns:
list[(Variable,Variable)]: Pairs of parameter and its
corresponding gradients. The key is the parameter and the
value is gradient variable.
Raises:
AssertionError: If `loss` is not an instance of Variable.
Examples:
.. code-block:: python
# network configuration code
# ...
avg_loss = fluid.layers.mean(loss)
param_grad_list = fluid.backward.append_backward(loss=avg_loss)
"""
assert isinstance(loss, framework.Variable)
......
......@@ -24,8 +24,6 @@ __all__ = [
'GradientClipByValue',
'GradientClipByNorm',
'GradientClipByGlobalNorm',
'append_gradient_clip_ops',
'error_clip_callback',
]
......@@ -38,6 +36,25 @@ class BaseErrorClipAttr(object):
class ErrorClipByValue(BaseErrorClipAttr):
"""
Clips tensor values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user, \
will be set to -max by framework.
Examples:
.. code-block:: python
var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
"""
def __init__(self, max, min=None):
max = float(max)
if min is None:
......@@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr):
class GradientClipByValue(BaseGradientClipAttr):
"""
Clips gradient values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user, \
will be set to -max by framework.
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByValue(-1.0, 1.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def __init__(self, max, min=None):
max = float(max)
if min is None:
......@@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr):
class GradientClipByNorm(BaseGradientClipAttr):
"""
Clips tensor values to a maximum L2-norm.
This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
:math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
:math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
.. math::
Out = \\frac{max\_norm * X}{norm(X)},
where :math:`norm(X)` represents the L2 norm of :math:`X`.
Args:
clip_norm (float): The maximum norm value
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByNorm(clip_norm=2.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def __init__(self, clip_norm):
self.clip_norm = clip_norm
......@@ -135,6 +208,44 @@ class GradientClipByNorm(BaseGradientClipAttr):
class GradientClipByGlobalNorm(BaseGradientClipAttr):
"""
Clips values of multiple tensors by the ratio of the sum of their norms.
Given a list of tensors t_list, and a clipping ratio clip_norm, this
operation returns a list of clipped tensors list_clipped and the global
norm (global_norm) of all tensors in t_list.
To perform the clipping, the values :math:`t\_list[i]` are set to:
.. math::
t\_list[i] = t\_list[i] * \\frac{clip\_norm}{\max(global\_norm, clip\_norm)}
where:
.. math::
global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are,
otherwise they're all shrunk by the global ratio.
Args:
clip_norm (float): The maximum norm value
group_name (str, optional): The group name for this clip.
Examples:
.. code-block:: python
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
with fluid.program_guard(main_program=prog_clip):
fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
"""
def __init__(self, clip_norm, group_name="default_group"):
if not isinstance(group_name, basestring):
raise TypeError("'group_name' must be a basestring.")
......@@ -183,15 +294,16 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
def set_gradient_clip(clip, param_list=None, program=None):
"""
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
which describes the type and detailed attributes of required gradient clip.
param_list(list, None by default): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name.
When it's None, all parameters in the program will be included.
program(Program, None by default): The program where parameters are.
Will be the default main program when assigned with None.
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
which describes the type and detailed attributes of required gradient clip.
param_list(list(Variable)): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name.
When it's None, all parameters in the program will be included.
program(Program): The program where parameters are.
Will be the default main program when assigned with None.
"""
if not isinstance(clip, BaseGradientClipAttr):
raise TypeError(
......
......@@ -29,6 +29,13 @@ class DataToLoDTensorConverter(object):
self.place = place
self.lod_level = lod_level
self.shape = shape
negtive_count = 0
for s in self.shape:
if s < 0:
negtive_count += 1
if negtive_count > 1:
self.shape = None
break
if dtype == core.VarDesc.VarType.FP32:
self.dtype = 'float32'
elif dtype == core.VarDesc.VarType.INT64:
......@@ -61,7 +68,9 @@ class DataToLoDTensorConverter(object):
self._feed_impl_(each_data, lod[1:], lod_level - 1)
def done(self):
arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape)
arr = numpy.array(self.data, dtype=self.dtype)
if self.shape:
arr = arr.reshape(self.shape)
t = core.LoDTensor()
t.set(arr, self.place)
if self.lod_level > 0:
......
......@@ -642,7 +642,13 @@ class Operator(object):
def set_attr(self, name, val):
self.attrs[name] = val
self.desc.set_attr(name, val)
if isinstance(val, Block):
self.desc.set_block_attr(name, val.desc)
elif isinstance(val, core.BlockDesc) or \
isinstance(val, core.ProgramDesc):
self.desc.set_serialized_attr(name, val.serialize_to_string())
else:
self.desc.set_attr(name, val)
@property
def attr_names(self):
......
......@@ -27,13 +27,30 @@ __all__ = ['Inferencer', ]
class Inferencer(object):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def __init__(self, infer_func, param_path, place=None, parallel=False):
"""
:param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference
:param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
"""
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
......@@ -60,9 +77,20 @@ class Inferencer(object):
def infer(self, inputs, return_numpy=True):
"""
:param inputs: a map of {"input_name": input_var} that will be feed into the inference program
to get the predict value
:return: the predict value of the inference model
Do Inference for Inputs
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
if not isinstance(inputs, dict):
raise ValueError(
......
......@@ -19,26 +19,39 @@ from framework import convert_np_dtype_to_dtype_
from core import VarDesc
__all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu',
'init_on_cpu', 'ConstantInitializer', 'UniformInitializer',
'NormalInitializer', 'XavierInitializer', 'BilinearInitializer'
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA',
'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'UniformInitializer', 'NormalInitializer', 'XavierInitializer',
'BilinearInitializer', 'MSRAInitializer'
]
_force_init_on_cpu_ = False
def force_init_on_cpu():
"""
The flag of whether force to init variables on CPU.
Examples:
.. code-block:: python
if force_init_on_cpu():
pass
"""
return _force_init_on_cpu_
@contextlib.contextmanager
def init_on_cpu():
"""
Switch program with `with` statement
Force the variable to be inited on CPU.
Examples:
>>> with init_on_cpu():
>>> step = layers.create_global_var()
.. code-block:: python
with init_on_cpu():
step = layers.create_global_var()
"""
global _force_init_on_cpu_
......@@ -104,14 +117,18 @@ class Initializer(object):
class ConstantInitializer(Initializer):
"""Implements the constant initializer
Args:
value (float): constant value to initialize the variable
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Constant(value=2.0))
"""
def __init__(self, value=0.0, force_cpu=False):
"""Constructor for ConstantInitializer
Args:
value: constant value to initialize the variable
"""
assert value is not None
super(ConstantInitializer, self).__init__()
self._value = value
......@@ -146,16 +163,20 @@ class ConstantInitializer(Initializer):
class UniformInitializer(Initializer):
"""Implements the random uniform distribution initializer
Args:
low (float): lower boundary of the uniform distribution
high (float): upper boundary of the uniform distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5))
"""
def __init__(self, low=-1.0, high=1.0, seed=0):
"""Constructor for UniformInitializer
Args:
low: lower boundary of the uniform distribution
high: upper boundary of the uniform distribution
seed: random seed
"""
assert low is not None
assert high is not None
assert high >= low
......@@ -196,17 +217,21 @@ class UniformInitializer(Initializer):
class NormalInitializer(Initializer):
"""Implements the random Normal(Gaussian) distribution initializer
"""Implements the Random Normal(Gaussian) distribution initializer
Args:
loc (float): mean of the normal distribution
scale (float): standard deviation of the normal distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0))
"""
def __init__(self, loc=0.0, scale=1.0, seed=0):
"""Constructor for NormalInitializer
Args:
loc: mean of the normal distribution
scale: standard deviation of the normal distribution
seed: random seed
"""
assert loc is not None
assert scale is not None
assert seed is not None
......@@ -246,39 +271,49 @@ class NormalInitializer(Initializer):
class XavierInitializer(Initializer):
"""Implements the Xavier initializer
"""
This class implements the Xavier weight initializer from the paper
Understanding the difficulty of training deep feedforward neural
networks[1] by Xavier Glorot and Yoshua Bengio.
`Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)).
the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
In case of Normal distribution, the mean is 0 and the standard deviation
is sqrt(2/ (fan_in + fan_out)).
is
.. math::
\sqrt{\\frac{2.0}{fan\_in + fan\_out}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float): fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out (float): fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in and fan_out to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.Xavier(uniform=False))
References:
[1] Understanding the difficulty of training deep feedforward neural
networks. International conference on artificial intelligence and
statistics.
(http://proceedings.mlr.press/v9/glorot10a.html)
"""
def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0):
"""Constructor for XavierInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out: fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in and fan_out to None for
most cases.
"""
assert uniform is not None
assert seed is not None
super(XavierInitializer, self).__init__()
......@@ -342,30 +377,42 @@ class MSRAInitializer(Initializer):
"""Implements the MSRA initializer a.k.a. Kaiming Initializer
This class implements the weight initialization from the paper
Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren
and Jian Sun. This is a robust initialization method that particularly
considers the rectifier nonlinearities. In case of Uniform distribution,
the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal
distribution, the mean is 0 and the standard deviation
is sqrt(2/ fan_in).
References:
[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance
on ImageNet Classification
(https://arxiv.org/abs/1502.01852)
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
robust initialization method that particularly considers the rectifier
nonlinearities. In case of Uniform distribution, the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float): fan_in for MSRAInitializer. If None, it is\
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.MSRA(uniform=False))
"""
def __init__(self, uniform=True, fan_in=None, seed=0):
"""Constructor for MSRAInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for MSRAInitializer. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in to None for most cases.
"""
assert uniform is not None
assert seed is not None
......@@ -425,34 +472,37 @@ class MSRAInitializer(Initializer):
class BilinearInitializer(Initializer):
"""Implements the bilinear initializer.
"""
This initializer can be used in transposed convolution operator to
act as upsampling. Users can upsample a feature map with shape of
(B, C, H, W) by any integer factor. The usage is:
>>> factor = 2
>>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> initializer=Bilinear())
>>> conv_up = fluid.layers.conv2d_transpose(
>>> input,
>>> num_filters=C,
>>> output_size=None,
>>> filter_size=2 * factor - factor % 2,
>>> padding=ceil((factor - 1) / 2.),
>>> stride=factor,
>>> groups=C,
>>> param_attr=w_attr,
>>> bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed
Examples:
.. code-block:: python
factor = 2
w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
initializer=Bilinear())
conv_up = fluid.layers.conv2d_transpose(
input,
num_filters=C,
output_size=None,
filter_size=2 * factor - factor % 2,
padding=ceil((factor - 1) / 2.),
stride=factor,
groups=C,
param_attr=w_attr,
bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise transposed
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
This initializer will set a (K, K) interpolation kernel for every channel
of the filter identically. The resulting shape of the output feature map
will be (B, C, factor * H, factor * W). Note that the learning rate and the
weight decay are set to 0 in order to keep coefficient values of bilinear
interpolation unchanged during training.
interpolation unchanged during training.
"""
def __init__(self):
......@@ -469,7 +519,7 @@ class BilinearInitializer(Initializer):
be added.
Returns:
the initialization op
Operator: the initialization op
Raises:
ValueError: If type of `var` and `block` is not right.
......
此差异已折叠。
......@@ -185,12 +185,14 @@ def Print(input,
Returns:
Variable: Output tensor, same data with input tensor.
Examples:
.. code-block:: python
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
'''
helper = LayerHelper('print', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
......@@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard):
class ConditionalBlock(object):
'''
**ConditionalBlock**
ConditionalBlock is an operator that bind a block to a specific condition,
if the condition matches, the corresponding block will be executed.
Args:
inputs (Variable): bool conditions.
is_scalar_condition (bool): whether the branch is controled by a scalar.
name(str): name of this ConditionalBlock.
Examples:
.. code-block:: python
cond = layers.less_than(x=label, y=limit)
true_image, false_image = layers.split_lod_tensor(
input=image, mask=cond)
true_cond = layers.ConditionalBlock([true_image])
with true_cond.block():
...
with false_cond.block():
...
'''
def __init__(self, inputs, is_scalar_condition=False, name=None):
for each_input in inputs:
if not isinstance(each_input, Variable):
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -194,16 +194,16 @@ def train(word_dict,
if is_local:
train_loop(fluid.default_main_program())
else:
port = os.getenv("PADDLE_INIT_PORT", "6174")
pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip...
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("TRAINERS"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
training_role = os.getenv("TRAINING_ROLE", "TRAINER")
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
......
......@@ -401,6 +401,15 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(output)
print(str(program))
def test_maxout(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[3, 5], dtype="float32")
y = layers.data(name='y', shape=[2, 3], dtype="float32")
output = layers.crop(x, shape=y)
self.assertIsNotNone(output)
print(str(program))
if __name__ == '__main__':
unittest.main()
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册