提交 0c13660a 编写于 作者: F fengjiayi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_add_axis

# Design Doc: Computations as Graphs
# Design Doc: Computations as a Graph
A primary goal of the refactorization of PaddlePaddle is a more flexible representation of deep learning computation, in particular, a graph of operators and variables, instead of sequences of layers as before.
......@@ -8,6 +8,8 @@ This document explains that the construction of a graph as three steps:
- construct the backward part
- construct the optimization part
## The Construction of a Graph
Let us take the problem of image classification as a simple example. The application program that trains the model looks like:
```python
......@@ -25,7 +27,9 @@ The first four lines of above program build the forward part of the graph.
![](images/graph_construction_example_forward_only.png)
In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x. `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b.
In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x. `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b, and the initialization operators.
Initialization operators are kind of "run-once" operators -- the `Run` method increments a class data member counter so to run at most once. By doing so, a parameter wouldn't be initialized repeatedly, say, in every minibatch.
In this example, all operators are created as `OpDesc` protobuf messages, and all variables are `VarDesc`. These protobuf messages are saved in a `BlockDesc` protobuf message.
......@@ -49,3 +53,18 @@ According to the chain rule of gradient computation, `ConstructBackwardGraph` wo
For each parameter, like W and b created by `layer.fc`, marked as double circles in above graphs, `ConstructOptimizationGraph` creates an optimization operator to apply its gradient. Here results in the complete graph:
![](images/graph_construction_example_all.png)
## Block and Graph
The word block and graph are interchangable in the desgin of PaddlePaddle. A [Block[(https://github.com/PaddlePaddle/Paddle/pull/3708) is a metaphore of the code and local variables in a pair of curly braces in programming languages, where operators are like statements or instructions. A graph of operators and variables is a representation of the block.
A Block keeps operators in an array `BlockDesc::ops`
```protobuf
message BlockDesc {
repeated OpDesc ops = 1;
repeated VarDesc vars = 2;
}
```
in the order that there appear in user programs, like the Python program at the beginning of this article. We can imagine that in `ops`, we have some forward operators, followed by some gradient operators, and then some optimization operators.
......@@ -2,6 +2,8 @@ digraph ImageClassificationGraph {
///////// The forward part /////////
FeedX [label="Feed", color=blue, shape=box];
FeedY [label="Feed", color=blue, shape=box];
InitW [label="Init", color=blue, shape=diamond];
Initb [label="Init", color=blue, shape=diamond];
FC [label="FC", color=blue, shape=box];
MSE [label="MSE", color=blue, shape=box];
......@@ -14,6 +16,8 @@ digraph ImageClassificationGraph {
FeedX -> x -> FC -> y -> MSE -> cost [color=blue];
FeedY -> l [color=blue];
InitW -> W [color=blue];
Initb -> b [color=blue];
W -> FC [color=blue];
b -> FC [color=blue];
l -> MSE [color=blue];
......
......@@ -87,3 +87,24 @@ message OpProto {
repeated Attr attrs = 4;
required string comment = 5;
}
enum DataType {
BOOL = 0;
INT16 = 1;
INT32 = 2;
INT64 = 3;
FP16 = 4;
FP32 = 5;
FP64 = 6;
}
message LoDTensorDesc {
required DataType data_type = 1;
repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
optional int32 lod_level = 3 [ default = 0 ];
}
message VarDesc {
required string name = 1;
optional LoDTensorDesc lod_tensor = 2;
}
......@@ -19,8 +19,8 @@
namespace paddle {
namespace framework {
LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end) {
LOD new_lod;
LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
LoD new_lod;
new_lod.reserve(level_end - level_begin);
for (size_t i = level_begin; i < level_end; i++) {
new_lod.emplace_back(in.at(i));
......@@ -28,10 +28,10 @@ LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end) {
return new_lod;
}
LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin,
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
size_t elem_end) {
// slice the lod.
LOD new_lod;
LoD new_lod;
new_lod.reserve(in.size() - level);
auto start = in.at(level)[elem_begin];
auto end = in.at(level)[elem_end];
......@@ -46,13 +46,13 @@ LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin,
std::transform(new_lod.back().begin(), new_lod.back().end(),
new_lod.back().begin(),
[start](int v) { return v - start; });
PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LOD");
PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LoD");
}
PADDLE_ENFORCE_LE(new_lod.size(), in.size());
return new_lod;
}
bool operator==(const LOD& a, const LOD& b) {
bool operator==(const LoD& a, const LoD& b) {
if (a.size() != b.size()) {
return false;
}
......@@ -72,12 +72,12 @@ bool operator==(const LOD& a, const LOD& b) {
return true;
}
void LODTensor::SliceLevels(size_t level_begin, size_t level_end) {
void LoDTensor::SliceLevels(size_t level_begin, size_t level_end) {
auto new_lod = framework::SliceLevels(lod_, level_begin, level_end);
lod_ = new_lod;
}
void LODTensor::SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) {
void LoDTensor::SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) {
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem_begin < NumElements(level),
......
......@@ -35,34 +35,34 @@ template <typename T>
using Vector = thrust::host_vector<T>;
#endif
using LOD = std::vector<Vector<size_t>>;
using LoD = std::vector<Vector<size_t>>;
LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end);
LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end);
LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin,
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
size_t elem_end);
bool operator==(const LOD& a, const LOD& b);
bool operator==(const LoD& a, const LoD& b);
/*
* LODTensor (Level of details Tensor)
* LoDTensor (Level of details Tensor)
* see https://en.wikipedia.org/wiki/Level_of_details for reference.
*/
class LODTensor {
class LoDTensor {
public:
LODTensor() {}
LODTensor(const LOD& lod, Tensor* t) : lod_(lod), tensor_(t) {}
LoDTensor() {}
LoDTensor(const LoD& lod, Tensor* t) : lod_(lod), tensor_(t) {}
void set_lod(const LOD& lod) { lod_ = lod; }
void set_lod(const LoD& lod) { lod_ = lod; }
void set_tensor(Tensor* tensor) { tensor_ = tensor; }
Tensor& tensor() { return *tensor_; }
LOD lod() { return lod_; }
LoD lod() { return lod_; }
/*
* Get a element from LOD.
* Get a element from LoD.
*/
size_t lod_element(size_t level, size_t elem) const {
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
......@@ -74,7 +74,7 @@ class LODTensor {
}
/*
* Number of LODTensor's levels, each level has units of data, for example,
* Number of LoDTensor's levels, each level has units of data, for example,
* in the sentence's view, article, paragraph, sentence are 3 levels.
*/
size_t NumLevels() const { return lod_.size(); }
......@@ -100,7 +100,7 @@ class LODTensor {
void SliceInLevel(size_t level, size_t elem_begin, size_t elem_end);
private:
LOD lod_;
LoD lod_;
Tensor* tensor_; // not owned
};
} // namespace framework
......
......@@ -94,7 +94,7 @@ Let's go on slicing this slice. Its <1,1>-slice is
|||
```
### The General Slicing Algorithm
### The Slicing Algorithm
The algorithm, with over-simplified data structure, is defined as
......@@ -106,17 +106,41 @@ struct LoDTensor {
float* tensor_;
};
LoDTensor Slice(const LoDTensor& lodt, int level, int sequence) {
LoDTensor Slice(const LoDTensor& lodt, int level, int sequence);
```
Let us revisit the example above
}
```
3
3 1 2
3 2 4 1 2 3
||| || |||| | || |||
```
### Slicing the Top Level
Suppose that we want to retrieve the <1,2>-slice
Please be aware that an RNN operator only slices the top level of a LoD Tensor to get the step inputs.
```
2
2 3
|| |||
```
```c++
LoDTensor Slice(const LoDTensor& lodt, int sequence) {
we will need to find out the starting position of this slice by summing over all leaf nodes in `LoD` to the left of the slice, i.e., 3 + 2 + 4 + 1 = 10.
To avoid the traversal of the LoD tree at slcing time, we can do it at the construction time -- instead of saving the lengths of the next level in the LoD tree, we can save the starting offset of the next level. For example, above LoD Tensor can be transformed into
```
0
0 9 10
0 3 5 9 10 12
||| || |||| | || |||
```
We don't really need the 0 on top, so the LoD Tensor could be
}
```
0 9 10
0 3 5 9 10 12
||| || |||| | || |||
```
......@@ -21,7 +21,7 @@
namespace paddle {
namespace framework {
class LODTensorTester : public ::testing::Test {
class LoDTensorTester : public ::testing::Test {
public:
virtual void SetUp() override {
// tensor's batch_size: 30
......@@ -29,7 +29,7 @@ class LODTensorTester : public ::testing::Test {
// 0 10 20
// 0 5 10 15 20
// 0 2 5 7 10 12 15 20
LOD lod;
LoD lod;
lod.push_back(std::vector<size_t>{0, 10, 20});
lod.push_back(std::vector<size_t>{0, 5, 10, 15, 20});
lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
......@@ -47,21 +47,21 @@ class LODTensorTester : public ::testing::Test {
protected:
platform::CPUPlace place;
Tensor tensor;
LODTensor lod_tensor;
LoDTensor lod_tensor;
};
TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); }
TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); }
TEST_F(LODTensorTester, NumElements) {
TEST_F(LoDTensorTester, NumElements) {
ASSERT_EQ(lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(lod_tensor.NumElements(2), 8UL);
}
TEST_F(LODTensorTester, SliceLevels) {
TEST_F(LoDTensorTester, SliceLevels) {
// slice 1 level
for (size_t level = 0; level < 3UL; ++level) {
LODTensor new_lod_tensor = lod_tensor;
LoDTensor new_lod_tensor = lod_tensor;
new_lod_tensor.SliceLevels(level, level + 1);
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level));
......@@ -70,7 +70,7 @@ TEST_F(LODTensorTester, SliceLevels) {
}
// slice 2 level
for (size_t level = 0; level < 2UL; ++level) {
LODTensor new_lod_tensor = lod_tensor;
LoDTensor new_lod_tensor = lod_tensor;
new_lod_tensor.SliceLevels(level, level + 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level));
......@@ -80,9 +80,9 @@ TEST_F(LODTensorTester, SliceLevels) {
}
}
TEST_F(LODTensorTester, SliceInLevel) {
TEST_F(LoDTensorTester, SliceInLevel) {
size_t level = 0;
LODTensor new_lod_tensor = lod_tensor;
LoDTensor new_lod_tensor = lod_tensor;
new_lod_tensor.SliceInLevel(level, 0, 2);
EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL);
EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL);
......
......@@ -80,7 +80,7 @@ TEST(OpRegistry, CreateOp) {
paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx);
float scale_get = op->GetAttr<float>("scale");
float scale_get = op->Attr<float>("scale");
ASSERT_EQ(scale_get, scale);
}
......@@ -121,7 +121,7 @@ TEST(OpRegistry, DefaultValue) {
paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx);
ASSERT_EQ(op->GetAttr<float>("scale"), 1.0);
ASSERT_EQ(op->Attr<float>("scale"), 1.0);
}
TEST(OpRegistry, CustomChecker) {
......@@ -172,6 +172,6 @@ TEST(OpRegistry, CustomChecker) {
paddle::platform::CPUDeviceContext dev_ctx;
paddle::framework::Scope scope;
op->Run(scope, dev_ctx);
int test_attr = op->GetAttr<int>("test_attr");
int test_attr = op->Attr<int>("test_attr");
ASSERT_EQ(test_attr, 4);
}
\ No newline at end of file
......@@ -69,7 +69,7 @@ class OperatorBase {
virtual ~OperatorBase() {}
template <typename T>
inline const T& GetAttr(const std::string& name) const {
inline const T& Attr(const std::string& name) const {
PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in AttributeMap",
name);
return boost::get<T>(attrs_.at(name));
......@@ -238,8 +238,8 @@ class InferShapeContext {
const Scope& scope() const { return scope_; }
template <typename T>
inline const T& GetAttr(const std::string& name) const {
return op_.GetAttr<T>(name);
inline const T& Attr(const std::string& name) const {
return op_.Attr<T>(name);
}
size_t InputSize(const std::string& name) const {
......
......@@ -19,12 +19,12 @@ template <typename T>
class CPUGaussianRandomKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
float mean = context.GetAttr<float>("mean");
float std = context.GetAttr<float>("std");
float mean = context.Attr<float>("mean");
float std = context.Attr<float>("std");
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
......@@ -45,7 +45,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext& context) const override {
auto* tensor = context.Output<framework::Tensor>("Out");
auto dims = GetAttr<std::vector<int>>("dims");
auto dims = Attr<std::vector<int>>("dims");
std::vector<int64_t> temp;
temp.reserve(dims.size());
for (auto dim : dims) {
......
......@@ -42,13 +42,13 @@ class GPUGaussianRandomKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override {
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
if (seed == 0) {
std::random_device rd;
seed = rd();
}
T mean = static_cast<T>(context.GetAttr<float>("mean"));
T std = static_cast<T>(context.GetAttr<float>("std"));
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
ssize_t N = framework::product(tensor->dims());
thrust::transform(index_sequence_begin, index_sequence_begin + N,
......
......@@ -109,7 +109,7 @@ void InitArgument(const ArgumentName& name, Argument* arg,
arg->step_scopes = op.Output(name.step_scopes);
auto inlinks = op.Inputs(name.inlinks);
auto inlink_alias = op.GetAttr<std::vector<std::string>>(name.inlink_alias);
auto inlink_alias = op.Attr<std::vector<std::string>>(name.inlink_alias);
PADDLE_ENFORCE(inlinks.size() == inlink_alias.size(),
"the size of inlinks and inlink_alias don't match:%d,%d",
inlinks.size(), inlink_alias.size());
......@@ -121,7 +121,7 @@ void InitArgument(const ArgumentName& name, Argument* arg,
}
auto outlinks = op.Outputs(name.outlinks);
auto outlink_alias = op.GetAttr<std::vector<std::string>>(name.outlink_alias);
auto outlink_alias = op.Attr<std::vector<std::string>>(name.outlink_alias);
PADDLE_ENFORCE(outlinks.size() == outlink_alias.size(),
"the size of outlinks and outlink_alias don't match:%d,%d",
outlinks.size(), outlink_alias.size());
......@@ -135,8 +135,8 @@ void InitArgument(const ArgumentName& name, Argument* arg,
auto boot_memories = op.Inputs(name.boot_memories);
// attributes
auto memories = op.GetAttr<std::vector<std::string>>(name.memories);
auto pre_memories = op.GetAttr<std::vector<std::string>>(name.pre_memories);
auto memories = op.Attr<std::vector<std::string>>(name.memories);
auto pre_memories = op.Attr<std::vector<std::string>>(name.pre_memories);
PADDLE_ENFORCE(memories.size() == boot_memories.size(),
"the size of memories, boot_memories don't match:%d,%d",
......
......@@ -60,7 +60,7 @@ class ScaleGradOp : public NetOp {
AppendOp(framework::OpRegistry::CreateOp(
"scale", {{"X", {Input(framework::GradVarName("Out"))}}},
{{"Out", {Output(framework::GradVarName("X"))}}},
{{"scale", GetAttr<AttrType>("scale")}}));
{{"scale", Attr<AttrType>("scale")}}));
CompleteAddOp(false);
}
};
......
......@@ -27,7 +27,7 @@ class ScaleKernel : public framework::OpKernel {
auto* in = context.Input<framework::Tensor>("X");
tensor->mutable_data<T>(in->place());
auto scale = static_cast<T>(context.GetAttr<AttrType>("scale"));
auto scale = static_cast<T>(context.Attr<AttrType>("scale"));
auto eigen_out = framework::EigenVector<T>::Flatten(*tensor);
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
......
......@@ -31,7 +31,7 @@ class SGDOpKernel : public framework::OpKernel {
auto param = ctx.Input<Tensor>("param");
auto grad = ctx.Input<Tensor>("grad");
auto param_out = ctx.Output<Tensor>("param_out");
float lr = ctx.GetAttr<float>("learning_rate");
float lr = ctx.Attr<float>("learning_rate");
param_out->mutable_data<T>(ctx.GetPlace());
......
......@@ -26,15 +26,15 @@ class CPUUniformRandomKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override {
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
}
engine.seed(seed);
std::uniform_real_distribution<T> dist(
static_cast<T>(context.GetAttr<float>("min")),
static_cast<T>(context.GetAttr<float>("max")));
static_cast<T>(context.Attr<float>("min")),
static_cast<T>(context.Attr<float>("max")));
int64_t size = framework::product(tensor->dims());
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
......@@ -48,10 +48,10 @@ class UniformRandomOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext& ctx) const override {
PADDLE_ENFORCE(GetAttr<float>("min") < GetAttr<float>("max"),
PADDLE_ENFORCE(Attr<float>("min") < Attr<float>("max"),
"uniform_random's min must less then max");
auto* tensor = ctx.Output<framework::Tensor>("Out");
auto dims = GetAttr<std::vector<int>>("dims");
auto dims = Attr<std::vector<int>>("dims");
std::vector<int64_t> temp;
temp.reserve(dims.size());
for (auto dim : dims) {
......
......@@ -45,13 +45,13 @@ class GPUUniformRandomKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override {
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.GetAttr<int>("seed"));
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
if (seed == 0) {
std::random_device rd;
seed = rd();
}
T min = static_cast<T>(context.GetAttr<float>("min"));
T max = static_cast<T>(context.GetAttr<float>("max"));
T min = static_cast<T>(context.Attr<float>("min"));
T max = static_cast<T>(context.Attr<float>("max"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
ssize_t N = framework::product(tensor->dims());
thrust::transform(index_sequence_begin, index_sequence_begin + N,
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <vector>
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/macros.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册