提交 368737e4 编写于 作者: L Luo Tao

Merge branch 'develop' into maxseq

......@@ -127,6 +127,7 @@ include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11
include(external/nccl)
include(cudnn) # set cudnn libraries, must before configure
include(configure) # add paddle env configuration
......@@ -159,7 +160,7 @@ set(EXTERNAL_LIBS
if(WITH_GPU)
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
endif(NOT WITH_DSO)
endif(WITH_GPU)
......
......@@ -22,7 +22,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \
apt-get install -y \
git python-pip python-dev openssh-server bison \
git python-pip python-dev openssh-server bison libnccl-dev \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \
......
......@@ -62,11 +62,11 @@ else()
FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
endif()
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
message(FATAL_ERROR "Paddle needs cudnn to compile")
endif()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
......
......@@ -8,7 +8,7 @@ ExternalProject_Add(
extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG 4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
......
include(ExternalProject)
set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
if(WITH_DSO)
# If we use DSO, we do not build nccl, just download the dependencies
set(NCCL_BUILD_COMMAND "")
set(NCCL_INSTALL_COMMAND "")
set(NCCL_INSTALL_DIR "")
else()
# otherwise, we build nccl and link it.
set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
# Note: cuda 8.0 is needed to make nccl
# When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
set(NCCL_BUILD_COMMAND "make -j 8")
set(NCCL_INSTALL_COMMAND "make install PREFIX=${NCCL_INSTALL_DIR}")
endif()
ExternalProject_Add(
extern_nccl
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
GIT_TAG "v1.3.4-1"
PREFIX "${NCCL_SOURCE_DIR}"
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
INSTALL_DIR "${NCCL_INSTALL_DIR}"
TEST_COMMAND ""
)
if(WITH_DSO)
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
add_library(nccl STATIC ${dummyfile})
else()
add_library(nccl INTERFACE)
endif()
else()
add_library(nccl STATIC IMPORTED GLOBAL)
set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
endif()
add_dependencies(nccl extern_nccl)
......@@ -125,3 +125,8 @@ simple_attention
:members: simple_attention
:noindex:
dot_product_attention
---------------------
.. automodule:: paddle.v2.networks
:members: dot_product_attention
:noindex:
......@@ -189,7 +189,7 @@ OpDesc {
inputs = {0} // the index of x in vars of BlockDesc above
outputs = {5, 3} // indices of act and hidden_out in vars of BlockDesc above
attrs {
"memories" : {1} // the index of h
"states" : {1} // the index of h
"step_net" : <above step net>
}
};
......
## Survey on Graph
Neural network framework often provides symbolic API for users to write network topology conveniently. This doc manily focus on symbolic API in most popular neural network frameworks, and try to find out how to parse symbolic configuration to a portable file, such as protobuf or json.
### Mxnet
The core concept of symbolic API is `Symbol`. Mxnet implements `Symbol` class in C++, and export to Python using C-API. Please refer to the comments in Mxnet:
`Symbol` is help class used to represent the operator node in Graph.
`Symbol` acts as an interface for building graphs from different components like Variable, Functor and Group. `Symbol` is also exported to python front-end (while Graph is not) to enable quick test and deployment. Conceptually, symbol is the final operation of a graph and thus including all the information required (the graph) to evaluate its output value.
A simple network topology wrote by Symbol is as follows:
```python
def get_symbol(num_classes=10, **kwargs):
data = mx.symbol.Variable('data')
data = mx.symbol.Flatten(data=data)
fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes)
mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')
return mlp
```
Varible here is actually a Symbol. Every basic Symbol will correspond to one Node, and every Node has its own NodeAttr. There is a op field in NodeAttr class, when a Symbol represents Variable(often input data), the op field is null.
Symbol contains a data member, std::vector<NodeEntry> outputs, and NodeEntry cantains a poniter to Node. We can follow the Node pointer to get all the Graph.
And Symbol can be saved to a Json file.
Here is a detailed example:
```
>>> import mxnet as mx
>>> data = mx.symbol.Variable('data')
>>> print data.debug_str()
Variable:data
>>> data = mx.symbol.Flatten(data=data)
>>> print data.debug_str()
Symbol Outputs:
output[0]=flatten0(0)
Variable:data
--------------------
Op:Flatten, Name=flatten0
Inputs:
arg[0]=data(0) version=0
>>> fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
>>> print fc1.debug_str()
Symbol Outputs:
output[0]=fc1(0)
Variable:data
--------------------
Op:Flatten, Name=flatten0
Inputs:
arg[0]=data(0) version=0
Variable:fc1_weight
Variable:fc1_bias
--------------------
Op:FullyConnected, Name=fc1
Inputs:
arg[0]=flatten0(0)
arg[1]=fc1_weight(0) version=0
arg[2]=fc1_bias(0) version=0
Attrs:
num_hidden=128
```
### TensorFlow
The core concept of symbolic API is `Tensor`. Tensorflow defines `Tensor` in Python. Please refer to the comments in TensorFlow:
A `Tensor` is a symbolic handle to one of the outputs of an `Operation`. It does not hold the values of that operation's output, but instead provides a means of computing those values in a TensorFlow [Session](https://www.tensorflow.org/api_docs/python/tf/Session).
A simple example is as follows:
```python
# Build a dataflow graph.
c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
e = tf.matmul(c, d)
# Construct a `Session` to execute the graph.
sess = tf.Session()
# Execute the graph and store the value that `e` represents in `result`.
result = sess.run(e)
```
The main method of `Tensor` is as follows:
```python
@property
def op(self):
"""The `Operation` that produces this tensor as an output."""
return self._op
@property
def dtype(self):
"""The `DType` of elements in this tensor."""
return self._dtype
@property
def graph(self):
"""The `Graph` that contains this tensor."""
return self._op.graph
@property
def name(self):
"""The string name of this tensor."""
if not self._op.name:
raise ValueError("Operation was not named: %s" % self._op)
return "%s:%d" % (self._op.name, self._value_index)
@property
def device(self):
"""The name of the device on which this tensor will be produced, or None."""
return self._op.device
```
Tensor can be taken as target to run by session. Tensor contains all the information of Graph, and tracks data dependency.
Here is a detailed example:
```
>>> import tensorflow as tf
>>> c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
>>> print c.graph
<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
>>> d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
>>> print d.graph
<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
>>> e = tf.matmul(c, d)
>>> print e.graph
<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
```
### Dynet
The core concept of symbolic API is `Expression`, and Dynet defines `Expression` class in C++.
A simple example is as follows:
```cpp
ComputationGraph cg;
Expression W = parameter(cg, pW);
Expression in = input(cg, xs[i]);
Expression label = input(cg, ys[i]);
Expression pred = W * in;
Expression loss = square(pred - label);
```
The input data and parameter are also represented by Expression. Every basci Expression corresponds to a Node. And input data is also a Node.
Expression has a data member ComputationGraph, and ComputationGraph will be modified in users' configuring process. Expression can be a running target, beacuse Expression contains all dependency.
Here is a detailed example:
write topology in C++
```
ComputationGraph cg;
Expression W = parameter(cg, pW);
cg.print_graphviz();
Expression pred = W * xs[i];
cg.print_graphviz();
Expression loss = square(pred - ys[i]);
cg.print_graphviz();
```
compile and print
```
# first print
digraph G {
rankdir=LR;
nodesep=.05;
N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
}
# second print
digraph G {
rankdir=LR;
nodesep=.05;
N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
N1 [label="v1 = v0 * -0.98"];
N0 -> N1;
}
# third print
digraph G {
rankdir=LR;
nodesep=.05;
N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
N1 [label="v1 = v0 * -0.98"];
N0 -> N1;
N2 [label="v2 = -1.88387 - v1"];
N1 -> N2;
N3 [label="v3 = -v2"];
N2 -> N3;
N4 [label="v4 = square(v3)"];
N3 -> N4;
}
```
### Conclusion
Actually, Symbol/Tensor/Expression in Mxnet/TensorFlow/Dynet are the same level concepts. We use a unified name Expression here, this level concept has following features:
- Users wirte topoloy with symbolic API, and all return value is Expression, including input data and parameter.
- Expression corresponds with a global Graph, and Expression can also be composed.
- Expression tracks all dependency and can be taken as a run target
# Design Doc: Model Format
## Motivation
A model is an output of the training process. One complete model consists of two parts, the **topology** and the **parameters**. In order to support industrial deployment, the model format must be self-complete and must not expose any training source code.
As a result, In PaddlePaddle, the **topology** is represented as a [ProgramDesc](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/doc/design/program.md), which describes the model structure. The **parameters** contain all the trainable weights in the model. We must support large size parameters and efficient serialization/deserialization of parameters.
## Implementation
The topology is saved as a plain text in a detailed self-contain protobuf file.
The parameters are saved as a binary file. As we all know, the protobuf message has a limit of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). We have done a [benchmark experiment](https://github.com/PaddlePaddle/Paddle/pull/4610), which shows that protobuf is not fit for the task.
As a result, we design a particular format for tensor serialization. By default, an arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of [LoDTensorDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99). We save the DescProto as the byte string header. It contains all the necessary information, such as the `dims`, the `name` of the tensor, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). A tensor stores values in a continuous memory buffer. For speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is,
|HeaderLength|ContentLength|**LoDTensorDesc**|**TensorValue**|
The table below shows a tensor's byte view in detail. Note that all the signed values are written in the little-endian format.
```text
[offset] [type] [description]
0004 4 bytes integer HeaderLength, the length of LoDTensorDesc
0008 4 bytes integer ContentLength, the length of LodTensor Buffer
0009 1 bytes char TensorDesc
00010 1 bytes char TensorDesc
...
00100 1 bytes char TensorValue
00101 1 bytes char TensorValue
00102 1 bytes char TensorValue ..
...
```
## Summary
- We introduce a model format.
- The model represented by its forward-pass computation procedure is saved in a **ProgramDesc** protobuf message.
- A bunch of specified format binary tensors describe the **parameters**.
......@@ -65,20 +65,6 @@ class Optimizer(object):
def __init__(self):
pass
def create_backward_pass(self, loss, parameter_list=None):
"""
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
for parameters in parameter_list
Args:
loss: an variable generated by cost function.
parameter_list: parameters that need to compute gradient and update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
return None
def create_optimization_pass(self, parameters_and_grads):
"""Add optimization operators to update gradients to variables.
......@@ -93,7 +79,7 @@ class Optimizer(object):
def minimize(self, loss, parameter_list):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and
This method combines interface `append_backward_ops()` and
`create_optimization_pass()` into one.
"""
params_grads = self.create_backward_pass(loss, parameter_list)
......
# Prune
## Motivation
We want to support running inference, training and checkpointing in one `ProgramDesc`. We implement
`void Prune(const ProgramDesc* input, ProgramDesc* output)` function, which takes a `ProgramDesc`
and generate a pruned `ProgramDesc`.
## Challenge
Pruning need to support both variables and operators being evaluation targets. Consider the following
different situations.
```python
# Case 1: run foward pass.
cost_np = session.run(target=cost)
# Case 2: run backward passing.
opts_np, _ = session.run(target=[cost, opt])
# Case 3: run checkpointing
_ = session.run(target=checkpoint)
```
## Solution
To support evaluation of operators, we add `is_target` field in the `OpDesc`.
```c++
message OpDesc {
required string type = 3;
repeated Var inputs = 1;
repeated Var outputs = 2;
repeated Attr attrs = 4;
optional bool is_target = 5 [ default = false ];
};
```
To support evaluation of variables, we add [fetch_op](https://github.com/PaddlePaddle/Paddle/pull/4599).
For each variable in the `target`, we insert a `fetch_op` into the `ProgramDesc` with `variable` being
`fetch_op`'s input. Then we also set `fetch_op` is a target.
### Algorithm
If an operator needs to be run, it must fall into one of the following cases:
1. It is the target.
2. It is depended by some other ops, meaning its output is some other op's input.
The first case can be checked by `op_desc.is_traget()` . The second case can be implement as
```c++
bool HasDependentVar(const OpDesc& op_desc, const std::set<string>& dependent_vars) {
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
if (dependent_vars.count(argu) != 0) {
return true;
}
}
}
return false;
}
```
Then the whole algorithm can be implemented as the following [code](https://github.com/tonyyang-svail/Paddle/blob/prune_impl/paddle/framework/prune.cc).
......@@ -177,9 +177,6 @@ REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, grad_op_class)
REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class)
```
### USE Macros
Make sure the registration process is executed and linked.
---
# Registration Process
1. Write an Op class and its gradient Op class, if required.
......@@ -188,8 +185,6 @@ Make sure the registration process is executed and linked.
1. Call maker class to complete `proto` and `checker`
2. Using the completed `proto` and `checker`, it will add a new key-value pair to the `OpInfoMap`
4. Invoke the `USE` macro in which the Op is used to make sure that it is linked.
---
# Backward Module (1/2)
### Create Backward Operator
......
......@@ -3,17 +3,17 @@
## The Problem Posed
Currently, for each C++ operator class definition, there registers a *gradient operator creator* function, which takes a C++ operator instance and returns the corresponding gradient operator instance.
Currently, for each C++ operator class definition, a *gradient operator creator* function is registered, which takes as input a C++ operator instance and returns the corresponding gradient operator instance.
However, we noticed two problems with the current deisgn:
However, we noticed two problems with the current design:
1. As we decided to separate the *compilation* and *execution* phases, we need to change the creator to take an `OpDesc` protobuf message in a `ProgramDesc` and inserts corresponding `OpDesc` messages into the `ProgramDesc` message.
1. As we decided to separate the *compilation* and the *execution* phases, we need to change the creator to take an `OpDesc` protobuf message in a `ProgramDesc` and inserts corresponding `OpDesc` messages into the `ProgramDesc` message.
1. Some operator's gradient computation requires more than one gradient operators. For example, the gradient of *minus* consists of two operators -- an identity operaotr and a scale operator. So we need to make the registration mechanism to support the mapping from an operator to a set of operators for gradient computation.
1. For some operators, the gradient computation can be written in terms of existing operators. For example, the gradient of *minus* operator consists of two operators -- an *identity* operator followed by a *scale* operator. Hence the registration mechanism needs to support mapping from an operator to a set of operators for the gradient computation.
## The Current Implementation
The C++ class `OpInfos` store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is
Instances of the C++ class `OpInfo` are stored an associative map whose key is the operator type. The `grad_op_type` indicates the associated gradient operator type. An operator can create the gradient operator by invoking `OpInfo::creator_` of the gradient operator. The pseudo code is as follows
```cpp
struct OpInfo {
......@@ -31,16 +31,16 @@ OperatorBase* CreateGradientOperator(const OperatorBase& op) {
## Proposed Solution
The mapping relationship between an operator and its gradient operators is a function. The interface of that function is:
The mapping relationship between an operator and its gradient operators is a function. The interface of this function is:
```cpp
// (OpDesc) --> vector<OpDesc>
std::function<std::vector<OpDescBind>(const OpDescBind&)>;
```
The function takes an `OpDescBind` of the forward operator and returns one or many gradient operator descriptions. `OpDescBind` is a C++ wrapper for protobuf message `OpDesc` to manipulate `OpDesc` fast.
The function takes an `OpDescBind` of the forward operator and returns one or many gradient operator descriptions. `OpDescBind` is a C++ wrapper for the protobuf message `OpDesc` for rapid manipulation of `OpDesc`.
The `GradOpDescMaker` will be registered in `OpInfo`, to replace `grad_op_type_` field. The `OpInfo` should be
The `GradOpDescMaker` will be registered in `OpInfo` and will replace the `grad_op_type_` field. The `OpInfo` should look like
```cpp
struct OpInfo {
......@@ -49,7 +49,7 @@ struct OpInfo {
};
```
The `grad_op_maker_ ` is `nullptr` if the operator does not have associated gradient operators.
The `grad_op_maker_ ` is a `nullptr` if the operator does not have any associated gradient operators.
We propose a base class called `GradOpDescMakerBase` to let operator developers generate `Gradient Operators` easily. The public interface of that class is
......@@ -74,7 +74,7 @@ func = [] (const OpDescBind& fwd_op) {
We can write many helper functions since the `GradOpDescMakerBase` is a class now. The basic helper functions get the variables of `Input`, `Output`, `InputGradient` and `OutputGradient` in the forwarding operator.
We should chagne register macros at the same time. In the current solution, there is no difference between forwarding operators and backward operators. So `REGISTER_OP` just register one operator. If the `REGISTER_OPERATOR ` contains `OpProtoAndCheckerMaker` and `GradOpDescMaker`, we just list them in the same macro. It can be done by a macro contains `__VA_ARGS__`.
We should change register macros at the same time. In the current solution, there is no difference between forwarding operators and backward operators. So `REGISTER_OP` just register one operator. If the `REGISTER_OPERATOR ` contains `OpProtoAndCheckerMaker` and `GradOpDescMaker`, we just list them in the same macro. It can be done by a macro contains `__VA_ARGS__`.
The user interface should be
......
# Regularization in PaddlePaddle
## Introduction to Regularization
A central problem in machine learning is how to design an algorithm that will perform well not just on the training data, but also on new data. A frequently faced problem is the problem of **overfitting**, where the model does not make reliable predictions on new unseen data. **Regularization** is the process of introducing additional information in order to prevent overfitting. This is usually done by adding extra penalties to the loss function that restricts the parameter spaces that an optimization algorithm can explore.
### Parameter Norm Penalties
Most common regularization approaches in deep learning are based on limiting the capacity of the models by adding a parameter norm penalty to the objective function `J`. This is given as follows:
<img src="./images/loss_equation.png" align="center"/><br/>
The parameter `alpha` is a hyperparameter that weights the relative contribution of the norm penalty term, `omega`, relative to the standard objective function `J`.
The most commonly used norm penalties are the L2 norm penalty and the L1 norm penalty. These are given as follows:
##### L2 Regularization:
<img src="./images/l2_regularization.png" align="center"/><br/>
##### L1 Regularization
<img src="./images/l1_regularization.png" align="center"/><br/>
A much more detailed mathematical background of regularization can be found [here](http://www.deeplearningbook.org/contents/regularization.html).
## Regularization Survey
A detailed survey of regularization in various deep learning frameworks can be found [here](https://github.com/PaddlePaddle/Paddle/wiki/Regularization-Survey).
## Proposal for Regularization in PaddlePaddle
### Low-Level implementation
In the new design, we propose to create new operations for regularization. For now, we can add 2 ops that correspond to the most frequently used regularizations:
- L2_regularization_op
- L1_regularization_op
These ops can be like any other ops with their own CPU/GPU implementations either using Eigen or separate CPU and GPU kernels. As the initial implementation, we can implement their kernels using Eigen following the abstraction pattern implemented for [Activation Ops](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/accuracy_op.h). This abstraction pattern can make it very easy to implement new regularization schemes other than L1 and L2 norm penalties.
The idea of building ops for regularization is in sync with the refactored Paddle philosophy of using operators to represent any computation unit. The way these ops will be added to the computation graph, will be decided by the [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) in Python API.
### Computation Graph
Below is an example of a really simple feed forward neural network.
<img src="./images/feed_forward.png" align="center"/><br/>
The Python API will modify this computation graph to add regularization operators. The modified computation graph will look as follows:
<img src="./images/feed_forward_regularized.png" align="center"/><br/>
   
### Python API implementation for Regularization
Using the low level ops, `L2_regularization_op` and `L1_regularization_op`, any user can add regularization to their computation graphs. However, this will require a lot of lines of code and we should design Python APIs that support regularization. An example of such an API can be seen in [Keras](https://keras.io/regularizers/). As per the PaddlePaddle [Python API design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md), the layer functions are responsible for creating operators, operator parameters and variables. Since regularization is a property of parameters, it makes sense to create these in the layer functions.
#### Creation of Regularization ops
There are two possibilities for creating the regularization ops:
1. We create these ops immediately while building the computation graph.
2. We add these ops in a lazy manner, just before the backward, similar to the way the optimization ops are added.
The proposal is to add these ops in a lazy manner just before the backward pass.
#### Storage of Regularization attributes
Since we want to create the regularization ops in a lazy manner, the regularization attributes (type of regularization and weight of regularization penalty) can be stored as attributes of the [`Parameter`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/framework.py#L421) class. This is because regularization is a property of the parameters and storing regularization properties with Parameters also allows for shared parameters.
#### High-level API
In PaddlePaddle Python API, users will primarily rely on [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) to create neural network layers. Hence, we also need to provide regularization functionality in layer functions. The design of these APIs can be postponed for later right now. A good reference for these APIs can be found in [Keras](https://keras.io/regularizers/) and also by looking at Tensorflow in [`tf.contrib.layers`](https://www.tensorflow.org/api_guides/python/contrib.layers).
# Design Doc: Selected Rows
`SelectedRows` is a kind of sparse tensor data type, which is designed to support `embedding` operators. The gradient of embedding table is a sparse tensor. Only a few rows are non-zero values in that tensor. It is straightforward to represent the sparse tensor by the following sparse tensor data structure:
`SelectedRows` is a type of sparse tensor data type, which is designed to support `embedding` operators. The gradient of embedding table is a sparse tensor. Only a few rows are non-zero values in this tensor. It is straight-forward to represent a sparse tensor by the following sparse tensor data structure:
```cpp
class SelectedRows {
......@@ -11,7 +11,7 @@ class SelectedRows {
};
```
The field `height_` shows the first dimension of `SelectedRows`. The `rows` are the indices of which rows of `SelectedRows` are non-zeros. The `value_` field is an N-dim tensor and shape is `[rows.size() /* NUM_ROWS */, ...]`, which supplies values for each row. The dimension of `SelectedRows` satisfies `[height_] + value_.shape[1:]`.
The field `height_` is the first dimension of `SelectedRows`. The `rows` are the indices of the non-zero rows of `SelectedRows`. The `value_` field is an N-dim tensor of shape `[rows.size() /* NUM_ROWS */, ...]`, which supplies values for each row. The dimension of `SelectedRows` satisfies `[height_] + value_.shape[1:]`.
Suppose that a SelectedRows-typed variable `x` has many rows, but only two of them have values -- row 73 is `[1, 2]` and row 84 is `[3, 4]`, the `SelectedRows` representation would be:
......@@ -25,7 +25,7 @@ x = SelectedRow {
## SelectedRows in Protobuf
`SelectedRows` is a kind of `Variable`. `VarDesc` in protobuf should describe the `SelectedRows` information. Only the tensor dimension of a `SelectedRows` will be described in compile-time since the `rows_` and `value_` are related to training data.
`SelectedRows` is a type of `Variable`. `VarDesc` in protobuf should describe the `SelectedRows` information. Only the tensor dimension of a `SelectedRows` will be described in compile-time because the `rows_` and `value_` are dependent on the training data.
So we use `TensorDesc` to unify `data_type` and `dims`. A LodTensorDesc contains a `TensorDesc` and `lod_level`. The description of `SelectedRows` is a Tensor description.
```proto
......@@ -54,7 +54,7 @@ message VarDesc {
## InferShape for Selected Rows
Just like `LoD` information, `InferShape` method will inference output tensor type as well. The operator should decide whether its output is a `SelectedRows` or `Dense` tensor.
Just like `LoD` information, `InferShape` method will infer the output tensor type as well. The operator should decide whether its output is a `SelectedRows` or `Dense` tensor.
For example, the gradient operator of `TableLookup` will always generate `SelectedRows`. Its `InferShape` method should be like following
......@@ -68,7 +68,7 @@ void TableLookupGrad::InferShape(context) {
## Sparse Operators
There are several operators should be written to support `SelectedRows`. They are:
There are several operators that need to be written to support `SelectedRows`. These are:
1. Operators which generates `SelectedRows` gradient. e.g. Gradient of `TableLookupOp`.
1. Operators which generate `SelectedRows` gradient. e.g. Gradient of `TableLookupOp`.
2. Optimize operators which support `SelectedRows` gradient. e.g. `SGD` or `AdaGrad` for `SelectedRows`. However, there should be only one `SGD` operator. `OpWithKernel::Run` should select a suitable kernel for both `dense` tensor or `SelectedRows`.
......@@ -174,7 +174,7 @@ decoder_inputs = paddle.layer.fc(
1. 两者都是对梯度的截断,但截断时机不同,前者在 :code:`optimzier` 更新网络参数时应用;后者在激活函数反向计算时被调用;
2. 截断对象不同:前者截断可学习参数的梯度,后者截断回传给前层的梯度;
除此之外,还可以通过减小学习或者对数据进行归一化处理来解决这类问题。
除此之外,还可以通过减小学习或者对数据进行归一化处理来解决这类问题。
5. 如何调用 infer 接口输出多个layer的预测结果
-----------------------------------------------
......
# 构建Android平台上的PaddlePaddle库
用户可通过交叉编译的方式,在用户熟悉的开发平台(Linux,Mac OS X和Windows)上编译Android平台上适用的PaddlePaddle库。
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
- 基于Docker容器的编译方式
- 基于Linux交叉编译环境的编译方式
## 基于Docker容器的编译方式
Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行,因此,使用基于Docker容器的编译方式,用户可在自己熟悉的开发平台上编译Android平台上适用的PaddlePaddle库。
### 构建PaddlePaddle的Android开发镜像
我们把PaddlePaddle的交叉编译环境打包成一个镜像,称为开发镜像,里面涵盖了交叉编译Android版PaddlePaddle库需要的所有编译工具。
```bash
$ git clone https://github.com/PaddlePaddle/Paddle.git
$ cd Paddle
$ docker build -t username/paddle-android:dev . -f Dockerfile.android
```
### 编译PaddlePaddle C-API库
构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。
Android的Docker开发镜像向用户提供两个可配置的参数:
| Argument | Optional Values | Default |
|-----------------|-------------------------|---------|
|`ANDROID_ABI` |`armeabi-v7a, arm64-v8a` | `armeabi-v7a` |
|`ANDROID_API` |`>= 21` | `21` |
- 编译`armeabi-v7a``Android API 21`的PaddlePaddle库
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev
```
- 编译`arm64-v8a``Android API 21`的PaddlePaddle库
```bash
$ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev
```
执行上述`docker run`命令时,容器默认执行[paddle/scripts/docker/build_android.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh)脚本。该脚本中记录了交叉编译Android版PaddlePaddle库常用的CMake配置,并且会根据`ANDROID_ABI``ANDROID_API`自动构建独立工具链、进行编译和安装。由于arm64架构要求Android API不小于21。因此当`ANDROID_ABI=arm64-v8a``ANDROID_API<21`时,Docker容器中将默认使用`Android API 21`的编译工具链。用户可以参考下文**配置交叉编译参数**章节,根据个人的需求修改定制Docker容器所执行的脚本。编译安装结束之后,PaddlePaddle的C-API库将被安装到`$PWD/install_android`目录,所依赖的第三方库同时也被安装到`$PWD/install_android/third_party`目录。
## 基于Linux交叉编译环境的编译方式
本文档将以Linux x86-64平台为例,介绍交叉编译Android平台上适用的PaddlePaddle库的方法和步骤。
## 准备交叉编译环境
### 准备交叉编译环境
从源码交叉编译PaddlePaddle,用户需要提前准备好交叉编译环境。Android平台上使用的C/C++交叉编译工具链为[Android NDK](https://developer.android.com/ndk/downloads/index.html?hl=zh-cn),用户可自行前往下载预编译好的版本,也可通过以下命令获取:
......@@ -13,18 +50,27 @@ unzip -q android-ndk-r14b-linux-x86_64.zip
```
Android NDK中包含了所有Android API级别、所有架构(arm/arm64/x86/mips)需要用到的编译工具和系统库。用户可根据自己的编译目标架构、所需支持的最低Android API级别,构建[独立工具链](https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn)
比如:
- 构建`armeabi-v7a``Android API 21`的独立工具链:
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm --platform=android-21 --install-dir=your/path/to/my_standalone_toolchain
--arch=arm --platform=android-21 --install-dir=your/path/to/arm_standalone_toolchain
```
此命令将在your/path/to/my_standalone_toolchain目录生成一套编译工具链,面向架构为32位ARM架构,支持的最小的Android API级别为21,使用的编译器为arm-linux-androideabi-gcc (GCC) 4.9
此命令将在`your/path/to/arm_standalone_toolchain`目录生成一套独立编译工具链,面向架构为32位ARM架构,支持的最小的Android API级别为21,支持编译器`arm-linux-androideabi-gcc (GCC) 4.9``clang 3.8`
注意:**PaddlePaddle要求使用的编译工具链所支持的Andoid API级别不小于21**
- 构建`arm64-v8a``Android API 21`的独立工具链:
```bash
your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \
--arch=arm64 --platform=android-21 --install-dir=your/path/to/arm64_standalone_toolchain
```
## 配置交叉编译参数
此命令将在`your/path/to/arm64_standalone_toolchain`目录生成一套独立编译工具链,面向架构为64位ARM64架构,支持的最小Android API级别为21,支持编译器`arm-linux-androideabi-gcc (GCC) 4.9``clang 3.8`
注意:**PaddlePaddle要求使用的编译工具链所支持的Android API级别不小于21**
### 配置交叉编译参数
CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/android.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/android.cmake),以提供一些默认的编译器和编译参数相关配置。注意,从CMake 3.7版本开始,CMake官方对Android平台的交叉编译提供了通用的支持。PaddlePaddle若检测到用户使用的CMake版本不低于3.7时,将会将用户传进来的配置参数传递CMake系统,交由CMake系统本身来处理。有关参数配置的详细说明见[cmake-toolchains](https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html#cross-compiling)
......@@ -36,32 +82,57 @@ CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cm
Android平台可选配置参数:
- `ANDROID_STANDALONE_TOOLCHAIN`,独立工具链所在的绝对路径,或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动推导和设置需要使用的交叉编译器、sysroot、以及Android API级别;否则,用户需要在cmake时手动设置这些值。无默认值。
- `ANDROID_ABI`,目标架构ABI。目前只支持`armeabi-v7a`,默认值为`armeabi-v7a`
- `ANDROID_TOOLCHAIN`,目标工具链。可设置`gcc/clang`,默认值为`clang`
- CMake 3.7以上,将会始终使用`clang`工具链;CMake 3.7以下,可设置`ANDROID_TOOLCHAIN=gcc`以使用`gcc`工具链。
- Android官方提供的`clang`编译器要求系统支持`GLIBC 2.15`以上。
- `ANDROID_ABI`,目标架构ABI。目前支持`armeabi-v7a``arm64-v8a`,默认值为`armeabi-v7a`
- `ANDROID_NATIVE_API_LEVEL`,工具链的Android API级别。若没有显式设置,PaddlePaddle将根据`ANDROID_STANDALONE_TOOLCHAIN`的值自动推导得到。
- `ANROID_ARM_MODE`,是否使用ARM模式。可设置`ON/OFF`,默认值为`ON`
- `ANDROID_ARM_NEON`,是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`
- `ANROID_ARM_MODE`,是否使用ARM模式。
- `ANDROID_ABI=armeabi-v7a`时,可设置`ON/OFF`,默认值为`ON`
- `ANDROID_ABI=arm64-v8a`时,不需要设置。
- `ANDROID_ARM_NEON`,是否使用NEON指令。
- `ANDROID_ABI=armeabi-v7a`时,可设置`ON/OFF`,默认值为`ON`
- `ANDROID_ABI=arm64-v8a`时,不需要设置。
其他配置参数:
- `USE_EIGEN_FOR_BLAS`,是否使用Eigen库进行矩阵计算。可设置`ON/OFF`,默认值为`OFF`
- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。
一种常用的cmake配置如下:
常用的cmake配置如下:
```bash
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=your/path/to/my_standalone_toolchain \
-DANDROID_STANDALONE_TOOLCHAIN=your/path/to/arm_standalone_toolchain \
-DANDROID_ABI=armeabi-v7a \
-DANDROID_ARM_NEON=ON \
-DANDROID_ARM_MODE=ON \
-DUSE_EIGEN_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
```
```
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=your/path/to/arm64_standalone_toolchain \
-DANDROID_ABI=arm64-v8a \
-DUSE_EIGEN_FOR_BLAS=OFF \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
..
```
用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE``MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE``Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。
## 编译和安装
**性能TIPS**,为了达到最快的计算速度,在CMake参数配置上,有以下建议:
- 设置`CMAKE_BUILD_TYPE``Release`
- 使用`clang`编译工具链
- `armeabi-v7a`时,设置`USE_EIGEN_BLAS=ON`,使用Eigen进行矩阵计算;`arm64-v8a`时,设置`USE_EIGEN_FOR_BLAS=OFF`,使用OpenBLAS进行矩阵计算
### 编译和安装
CMake配置完成后,执行以下命令,PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle预测库。
......@@ -72,4 +143,4 @@ make install
注意:如果你曾经在源码目录下编译过其他平台的PaddlePaddle库,请先使用`rm -rf`命令删除`third_party`目录和`build`目录,以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。
执行完安装命令后,`your/path/to/install`目录中会包含`include``lib`目录,其中`include`中包含C-API的头文件,`lib`中包含一个Android版本的库。自此,PaddlePaddle的已经安装完成,用户可将`your/path/to/install`目录下的生成文件用于深度学习相关Android App中,调用方法见C-API文档。
执行完安装命令后,`your/path/to/install`目录中会包含`include``lib``third_party`目录,其中`include`中包含C-API的头文件,`lib`中包含若干个不同Android ABI的PaddlePaddle库,`third_party`中包含所依赖的所有第三方库。自此,PaddlePaddle的已经安装完成,用户可将`your/path/to/install`目录下的生成文件用于深度学习相关Android App中,调用方法见C-API文档。
import gzip
import math
import paddle.v2 as paddle
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.embedding(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0,
sparse_update=True))
return wordemb
def main():
# for local training
cluster_train = False
if not cluster_train:
paddle.init(use_gpu=False, trainer_count=1)
else:
paddle.init(
use_gpu=False,
trainer_count=2,
port=7164,
ports_num=1,
ports_num_for_sparse=1,
num_gradient_servers=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
'w') as f:
trainer.save_parameter_to_tar(f)
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adagrad = paddle.optimizer.AdaGrad(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost,
parameters,
adagrad,
is_local=not cluster_train)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
import math
import os
import paddle.v2 as paddle
import pickle
embsize = 32
hiddensize = 256
N = 5
cluster_train_file = "./train_data_dir/train/train.txt"
cluster_test_file = "./test_data_dir/test/test.txt"
node_id = os.getenv("OMPI_COMM_WORLD_RANK")
if not node_id:
raise EnvironmentError("must provied OMPI_COMM_WORLD_RANK")
def wordemb(inlayer):
wordemb = paddle.layer.embedding(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0,
sparse_update=True))
return wordemb
def cluster_reader_cluster(filename, node_id):
def cluster_reader():
with open("-".join([filename, "%05d" % int(node_id)]), "r") as f:
for l in f:
csv_data = [int(cell) for cell in l.split(",")]
yield tuple(csv_data)
return cluster_reader
def main():
# get arguments from env
# for local training
TRUTH = ["true", "True", "TRUE", "1", "yes", "Yes", "YES"]
cluster_train = os.getenv('PADDLE_CLUSTER_TRAIN', "False") in TRUTH
use_gpu = os.getenv('PADDLE_INIT_USE_GPU', "False")
if not cluster_train:
paddle.init(
use_gpu=use_gpu,
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")))
else:
paddle.init(
use_gpu=use_gpu,
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")),
port=int(os.getenv("PADDLE_INIT_PORT", "7164")),
ports_num=int(os.getenv("PADDLE_INIT_PORTS_NUM", "1")),
ports_num_for_sparse=int(
os.getenv("PADDLE_INIT_PORTS_NUM_FOR_SPARSE", "1")),
num_gradient_servers=int(
os.getenv("PADDLE_INIT_NUM_GRADIENT_SERVERS", "1")),
trainer_id=int(os.getenv("PADDLE_INIT_TRAINER_ID", "0")),
pservers=os.getenv("PADDLE_INIT_PSERVERS", "127.0.0.1"))
fn = open("thirdparty/wuyi_train_thdpty/word_dict.pickle", "r")
word_dict = pickle.load(fn)
fn.close()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
cluster_reader_cluster(cluster_test_file, node_id), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adagrad = paddle.optimizer.AdaGrad(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost,
parameters,
adagrad,
is_local=not cluster_train)
trainer.train(
paddle.batch(cluster_reader_cluster(cluster_train_file, node_id), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
import paddle.v2 as paddle
import tarfile
import os
import pickle
SPLIT_COUNT = 3
N = 5
def file_len(fd):
for i, l in enumerate(fd):
pass
return i + 1
def split_from_reader_by_line(filename, reader, split_count):
fn = open(filename, "w")
for batch_id, batch_data in enumerate(reader()):
batch_data_str = [str(d) for d in batch_data]
fn.write(",".join(batch_data_str))
fn.write("\n")
fn.close()
fn = open(filename, "r")
total_line_count = file_len(fn)
fn.close()
per_file_lines = total_line_count / split_count + 1
cmd = "split -d -a 5 -l %d %s %s-" % (per_file_lines, filename, filename)
os.system(cmd)
word_dict = paddle.dataset.imikolov.build_dict()
with open("word_dict.pickle", "w") as dict_f:
pickle.dump(word_dict, dict_f)
split_from_reader_by_line("train.txt",
paddle.dataset.imikolov.train(word_dict, N),
SPLIT_COUNT)
split_from_reader_by_line("test.txt",
paddle.dataset.imikolov.test(word_dict, N),
SPLIT_COUNT)
......@@ -25,9 +25,8 @@ import (
"strings"
"time"
log "github.com/inconshreveable/log15"
"github.com/namsral/flag"
log "github.com/sirupsen/logrus"
"github.com/topicai/candy"
"github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/Paddle/go/utils/networkhelper"
......@@ -41,16 +40,20 @@ func main() {
taskTimeoutMax := flag.Int("task-timeout-max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk-per-task", 10, "chunk per task.")
logLevel := flag.String("log-level", "info",
"log level, possible values: debug, info, warning, error, fatal, panic")
"log level, possible values: debug, info, warn, error, crit")
flag.Parse()
level, e := log.ParseLevel(*logLevel)
candy.Must(e)
lvl, err := log.LvlFromString(*logLevel)
if err != nil {
panic(err)
}
log.SetLevel(level)
log.Root().SetHandler(
log.LvlFilterHandler(lvl, log.CallerStackHandler("%+v", log.StderrHandler)),
)
if *endpoints == "" {
log.Warningln("-endpoints not set, fault tolerance not be enabled.")
log.Warn("-endpoints not set, fault tolerance not be enabled.")
}
var store master.Store
......@@ -58,23 +61,25 @@ func main() {
eps := strings.Split(*endpoints, ",")
ip, err := networkhelper.GetExternalIP()
if err != nil {
log.Fatal(err)
log.Crit("get external ip error", log.Ctx{"error": err})
panic(err)
}
addr := fmt.Sprintf("%s:%d", ip, *port)
store, err = master.NewEtcdClient(eps, addr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, *ttlSec)
if err != nil {
log.Fatal(err)
log.Crit("error creating etcd client.", log.Ctx{"error": err})
panic(err)
}
} else {
store = &master.InMemStore{}
}
shutdown := func() {
log.Infoln("shutting down gracefully")
log.Info("shutting down gracefully")
err := store.Shutdown()
if err != nil {
log.Errorln(err)
log.Error("shutdown error", log.Ctx{"error": err})
}
}
......@@ -86,24 +91,28 @@ func main() {
s, err := master.NewService(store, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
if err != nil {
log.Fatal(err)
log.Crit("error creating new service.", log.Ctx{"error": err})
panic(err)
}
err = rpc.Register(s)
if err != nil {
log.Fatal(err)
log.Crit("error registering to etcd.", log.Ctx{"error": err})
panic(err)
}
rpc.HandleHTTP()
l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
if err != nil {
log.Fatal(err)
log.Crit("error listing to port", log.Ctx{"error": err, "port": *port})
panic(err)
}
go func() {
err = http.Serve(l, nil)
if err != nil {
log.Fatal(err)
log.Crit("error serving HTTP", log.Ctx{"error": err})
panic(err)
}
}()
......
......@@ -27,11 +27,11 @@ import (
"github.com/topicai/candy"
"github.com/PaddlePaddle/Paddle/go/pserver"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
func main() {
port := flag.Int("port", 0, "port of the pserver")
port := flag.Int("port", 8001, "port of the pserver")
index := flag.Int("index", -1, "index of the pserver, set to -1 if use etcd for auto pserver index registry")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd")
......@@ -41,13 +41,17 @@ func main() {
checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path")
checkpointInterval := flag.Duration("checkpoint-interval", 600*time.Second, "save checkpoint per interval seconds")
logLevel := flag.String("log-level", "info",
"log level, possible values: debug, info, warning, error, fatal, panic")
"log level, possible values: debug, info, warn, error, crit")
flag.Parse()
level, err := log.ParseLevel(*logLevel)
candy.Must(err)
lvl, err := log.LvlFromString(*logLevel)
if err != nil {
panic(err)
}
log.SetLevel(level)
log.Root().SetHandler(
log.LvlFilterHandler(lvl, log.CallerStackHandler("%+v", log.StderrHandler)),
)
var idx int
......@@ -63,7 +67,7 @@ func main() {
cp, err = pserver.LoadCheckpoint(e, idx)
if err != nil {
if err == pserver.ErrCheckpointNotFound {
log.Infof("Could not find the pserver checkpoint.")
log.Info("Could not find the pserver checkpoint.")
} else {
panic(err)
}
......@@ -71,10 +75,10 @@ func main() {
}
shutdown := func() {
log.Infoln("shutting down gracefully")
log.Info("shutting down gracefully")
sErr := e.Shutdown()
if sErr != nil {
log.Errorln(sErr)
log.Error("error shutting down", log.Ctx{"error": sErr})
}
}
......@@ -95,7 +99,7 @@ func main() {
candy.Must(err)
go func() {
log.Infof("start pserver at port %d", *port)
log.Info("starting pserver", log.Ctx{"port": *port})
err = http.Serve(l, nil)
candy.Must(err)
}()
......
hash: 328e7b9b7306b45e7b9879139a9f86698115981f6283032e1312093a6a6ddb04
updated: 2017-10-16T08:00:23.484693528Z
hash: 51d9e2e46d7fd9173ff11ecada40f7b7728756be18d5e2f032535f66465e6e15
updated: 2017-10-24T15:04:09.987751592-07:00
imports:
- name: github.com/alecthomas/gometalinter
version: bae2f1293d092fd8167939d5108d1b025eaef9de
......@@ -99,6 +99,8 @@ imports:
version: d2709f9f1f31ebcda9651b03077758c1f3a0018c
- name: github.com/ghodss/yaml
version: 0ca9ea5df5451ffdf184b4428c902747c2c11cd7
- name: github.com/go-stack/stack
version: 817915b46b97fd7bb80e8ab6b69f01a53ac3eebf
- name: github.com/gogo/protobuf
version: 909568be09de550ed094403c2bf8a261b5bb730a
subpackages:
......@@ -120,8 +122,14 @@ imports:
- runtime
- runtime/internal
- utilities
- name: github.com/inconshreveable/log15
version: 0decfc6c20d9ca0ad143b0e89dcaa20f810b4fb3
- name: github.com/jonboulle/clockwork
version: 2eee05ed794112d45db504eb05aa693efd2b8b09
- name: github.com/mattn/go-colorable
version: 5411d3eea5978e6cdc258b30de592b60df6aba96
- name: github.com/mattn/go-isatty
version: 57fdcb988a5c543893cc61bce354a6e24ab70022
- name: github.com/matttproud/golang_protobuf_extensions
version: c12348ce28de40eed0136aa2b644d0ee0650e56c
subpackages:
......@@ -179,11 +187,12 @@ imports:
- lex/httplex
- trace
- name: golang.org/x/sys
version: 0f826bdd13b500be0f1d4004938ad978fcc6031e
version: e48874b42435b4347fc52bdee0424a52abc974d7
repo: https://github.com/golang/sys.git
vcs: git
subpackages:
- unix
- windows
- name: golang.org/x/text
version: 836efe42bb4aa16aaa17b9c155d8813d336ed720
repo: https://github.com/golang/text.git
......@@ -222,4 +231,3 @@ testImports:
version: 05e8a0eda380579888eb53c394909df027f06991
subpackages:
- assert
......@@ -26,3 +26,7 @@ import:
version: v1.1.0
- package: github.com/alecthomas/gometalinter
version: v1.2.1
- package: github.com/inconshreveable/log15
version: v2.13
- package: github.com/go-stack/stack
version: v1.6.0
......@@ -35,13 +35,19 @@ import (
"unsafe"
"github.com/PaddlePaddle/Paddle/go/master"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
var mu sync.Mutex
var handleMap = make(map[C.paddle_master_client]*master.Client)
var curHandle C.paddle_master_client
func init() {
log.Root().SetHandler(
log.LvlFilterHandler(log.LvlWarn, log.CallerStackHandler("%+v", log.StderrHandler)),
)
}
func add(c *master.Client) C.paddle_master_client {
mu.Lock()
defer mu.Unlock()
......@@ -117,7 +123,7 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int
}
err := c.SetDataset(paths)
if err != nil {
log.Errorln(err)
log.Error("error set dataset", log.Ctx{"error": err})
return C.PADDLE_MASTER_ERROR
}
......@@ -167,7 +173,7 @@ func paddle_request_save_model(client C.paddle_master_client, trainerID string,
c := get(client)
need, err := c.RequestSaveModel(trainerID, time.Duration(blockMS)*time.Millisecond)
if err != nil {
log.Errorln(err)
log.Error("error request save model", log.Ctx{"error": err})
return C.PADDLE_MASTER_ERROR
}
......
......@@ -21,7 +21,7 @@ import (
"github.com/PaddlePaddle/Paddle/go/connection"
"github.com/PaddlePaddle/recordio"
"github.com/coreos/etcd/clientv3"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
// Client is the client of the master server.
......@@ -75,7 +75,7 @@ func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error {
for {
err := f()
if err != nil {
log.Warningln(err)
log.Warn("create etcd client error", log.Ctx{"error": err})
} else {
break
}
......@@ -135,13 +135,13 @@ func (c *Client) getRecords(passID int) {
time.Sleep(time.Second * 3)
continue
}
log.Errorf("getTask error: %s", err)
log.Error("getTask error.", log.Ctx{"error": err})
}
for _, chunk := range t.Chunks {
f, e := os.Open(chunk.Path)
if e != nil {
log.Errorln(e)
log.Error("error open chunk", log.Ctx{"error": e})
continue
}
......@@ -152,12 +152,15 @@ func (c *Client) getRecords(passID int) {
if s.Err() != nil {
c.ch <- record{nil, s.Err()}
log.Errorln(err, chunk.Path)
log.Error(
"error scan chunk",
log.Ctx{"error": err, "path": chunk.Path},
)
}
err = f.Close()
if err != nil {
log.Errorln(err)
log.Error("error close record file", log.Ctx{"error": err})
}
}
......@@ -166,7 +169,7 @@ func (c *Client) getRecords(passID int) {
// correct, but a reasonable approximation.
err = c.taskFinished(t.Meta.ID)
if err != nil {
log.Errorln(err)
log.Error("task finish callback error.", log.Ctx{"error": err})
}
}
}
......@@ -179,12 +182,12 @@ func (c *Client) monitorMaster(addrCh <-chan string) {
if curMaster == "" {
err := c.conn.Close()
if err != nil {
log.Errorln(err)
log.Error("close old master addr error", log.Ctx{"error": err})
}
} else {
err := c.conn.Connect(curMaster)
if err != nil {
log.Errorln(err)
log.Error("connect to new master addr error", log.Ctx{"error": err})
// connect to addr failed, set
// to last known addr in order
......
......@@ -25,8 +25,6 @@ import (
"testing"
"time"
log "github.com/sirupsen/logrus"
"github.com/PaddlePaddle/Paddle/go/connection"
"github.com/PaddlePaddle/recordio"
)
......@@ -36,10 +34,6 @@ const (
chunkPerTask = 10
)
func init() {
log.SetLevel(log.ErrorLevel)
}
func TestGetFinishTask(t *testing.T) {
const path = "/tmp/master_client_test_0"
......
......@@ -20,7 +20,7 @@ import (
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
const (
......@@ -44,7 +44,7 @@ type EtcdClient struct {
// NewEtcdClient creates a new EtcdClient.
func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) {
log.Debugf("Connecting to etcd at %v", endpoints)
log.Debug("Connecting to etcd", log.Ctx{"endpoint": endpoints})
cli, err := clientv3.New(clientv3.Config{
Endpoints: endpoints,
DialTimeout: dialTimeout,
......@@ -64,12 +64,12 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
// one master running, but split-brain problem may cause
// multiple master servers running), and the cluster management
// software will kill one of them.
log.Infof("Trying to acquire lock at %s.", lockPath)
log.Info("Trying to acquire lock.", log.Ctx{"path": lockPath})
err = lock.Lock(context.TODO())
if err != nil {
return nil, err
}
log.Infof("Successfully acquired lock at %s.", lockPath)
log.Info("Successfully acquired lock at %s.", log.Ctx{"path": lockPath})
put := clientv3.OpPut(addrPath, addr)
resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit()
......@@ -78,7 +78,8 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
}
if !resp.Succeeded {
log.Fatal("No longer owns the master lock. Exiting.")
log.Crit("No longer owns the master lock. Exiting.")
panic("No longer owns the master lock. Exiting.")
}
e := &EtcdClient{
......@@ -102,7 +103,7 @@ func (e *EtcdClient) Save(state []byte) error {
}
if !resp.Succeeded {
log.Errorln("No longer owns the lock, trying to lock again")
log.Error("No longer owns the lock, trying to lock again")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
err := e.lock.Lock(ctx)
cancel()
......@@ -116,9 +117,10 @@ func (e *EtcdClient) Save(state []byte) error {
// to kill current master server. The current
// state is not saved, but the trainer's RPC
// call will fail, so the trainer will retry.
log.Fatalf("Could not acquire the lock at %s: %v. Exiting.", e.lockPath, err)
log.Crit("Could not acquire the lock at %s: %v. Exiting.", log.Ctx{"path": e.lockPath, "error": err})
panic("Could not acquire the lock at %s: %v. Exiting.")
}
log.Infof("Successfully acquired lock at %s.", e.lockPath)
log.Info("Successfully acquired lock at %s.", e.lockPath)
return e.Save(state)
}
......@@ -136,7 +138,7 @@ func (e *EtcdClient) Load() ([]byte, error) {
}
if !resp.Succeeded {
log.Errorln("No longer owns the lock, trying to lock and load again.")
log.Error("No longer owns the lock, trying to lock and load again.")
err = e.lock.Lock(context.Background())
if err != nil {
return nil, err
......@@ -163,7 +165,7 @@ func (e *EtcdClient) Shutdown() error {
if err == nil {
err = newErr
} else {
log.Errorln(newErr)
log.Error("shutdown error", log.Ctx{"error": newErr})
}
}
......@@ -192,7 +194,7 @@ func watchKey(c *clientv3.Client, key string, valChan chan<- string) {
for wresp := range rch {
for _, ev := range wresp.Events {
// if received event is DELETE, the value will be an empty string
log.Infof("received event %s, %q : %q\n", ev.Type, ev.Kv.Key, ev.Kv.Value)
log.Info("received event.", log.Ctx{"type": ev.Type, "key": ev.Kv.Key, "value": ev.Kv.Value})
valChan <- string(ev.Kv.Value)
}
}
......
......@@ -25,7 +25,7 @@ import (
"sync"
"time"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
"github.com/PaddlePaddle/recordio"
)
......@@ -170,11 +170,11 @@ func (s *Service) recover() (bool, error) {
}
if state == nil {
log.Infoln("No state exists, not recovered.")
log.Info("No state exists, not recovered.")
return false, nil
}
log.Infof("Loaded snapshot of size: %d bytes.", len(state))
log.Info("Loaded snapshot.", log.Ctx{"size": len(state)})
gr, err := gzip.NewReader(bytes.NewReader(state))
if err != nil {
return false, err
......@@ -191,11 +191,11 @@ func (s *Service) recover() (bool, error) {
if err != nil {
// Only close failed, recover actually succeed, so
// just log error.
log.Errorln(err)
log.Error("error close recover file.", log.Ctx{"error": err})
}
s.state = tqs
log.WithFields(s.logFields()).Infof("Master recovered from snapshot, scheduling pending task timeout check.")
log.Info("Master recovered from snapshot, scheduling pending task timeout check.", s.logCtx())
for _, t := range s.state.Pending {
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.Meta.ID, t.Task.Meta.Epoch))
}
......@@ -224,7 +224,7 @@ func (s *Service) snapshot() error {
}
state := buf.Bytes()
log.Infof("Saving snapshot of size: %d bytes.", len(state))
log.Info("Saving snapshot.", log.Ctx{"size bytes": len(state)})
return s.store.Save(state)
}
......@@ -260,7 +260,7 @@ func readChunks(globPaths []string) ([]Chunk, error) {
}
count := index.NumChunks()
log.Infof("readChunks: file %s has %d chunks", path, count)
log.Info("reading chunks.", log.Ctx{"path": path, "num chunks": count})
for i := 0; i < count; i++ {
chunk := Chunk{
Path: path,
......@@ -300,7 +300,7 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error {
err = s.snapshot()
if err != nil {
log.Errorln(err)
log.Error("snapshot error", log.Ctx{"error": err})
return err
}
close(s.ready)
......@@ -320,7 +320,7 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
defer func() {
err := s.snapshot()
if err != nil {
log.Errorln(err)
log.Error("snapshot error", log.Ctx{"error": err})
}
}()
......@@ -328,12 +328,12 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
t.NumFailure++
if t.NumFailure > s.failureMax {
log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
log.Warn("Task failed to many times, discard.", log.Ctx{"task": t.Task, "num failed": t.NumFailure})
s.state.Failed = append(s.state.Failed, t)
return
}
log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure)
log.Warn("Task failed, re-dispatch.", log.Ctx{"task": t.Task, "num failed": t.NumFailure})
s.state.Todo = append(s.state.Todo, t)
return
}
......@@ -353,8 +353,8 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
}
// must be called with lock held.
func (s *Service) logFields() log.Fields {
return log.Fields{
func (s *Service) logCtx() log.Ctx {
return log.Ctx{
"todoLen": len(s.state.Todo),
"pendingLen": len(s.state.Pending),
"doneLen": len(s.state.Done),
......@@ -383,10 +383,10 @@ func (s *Service) GetTask(passID int, task *Task) error {
if len(s.state.Todo) == 0 {
if len(s.state.Done) == 0 && len(s.state.Pending) == 0 {
log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass")
log.Warn("All tasks failed, may start next pass", s.logCtx())
return ErrAllTaskFailed
}
log.WithFields(s.logFields()).Warningln("No more available task.")
log.Warn("No more available task.", s.logCtx())
return ErrNoMoreAvailable
}
......@@ -400,8 +400,9 @@ func (s *Service) GetTask(passID int, task *Task) error {
}
*task = t.Task
log.WithFields(s.logFields()).Infof("Task #%v dispatched.", t.Task.Meta)
ctx := s.logCtx()
ctx["task meta"] = t.Task.Meta
log.Info("Task dispatched.", ctx)
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.Meta.ID, t.Task.Meta.Epoch))
return nil
}
......@@ -417,7 +418,9 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
t, ok := s.state.Pending[taskID]
if !ok {
log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID)
ctx := s.logCtx()
ctx["task id"] = taskID
log.Warn("Pending task not found.", ctx)
return nil
}
......@@ -426,7 +429,9 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
s.state.Done = append(s.state.Done, t)
delete(s.state.Pending, taskID)
log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID)
ctx := s.logCtx()
ctx["task id"] = taskID
log.Info("Task finished.", ctx)
if len(s.state.Todo) == 0 && len(s.state.Pending) == 0 {
// increase master side pass count if all tasks finished
s.state.CurPass++
......@@ -434,12 +439,14 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
s.state.Done = []taskEntry{}
// TODO(typhoonzero): deal with failed tasks
s.state.Failed = []taskEntry{}
log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.state.CurPass)
ctx := s.logCtx()
ctx["new pass"] = s.state.CurPass
log.Warn("all task finished, add new pass data.", ctx)
}
err := s.snapshot()
if err != nil {
log.Errorln(err)
log.Error("snapshot error", log.Ctx{"error": err})
}
return err
}
......@@ -455,7 +462,7 @@ func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error {
t, ok := s.state.Pending[meta.ID]
if !ok {
log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta)
log.Warn("TaskFailed:Pending task not found.", log.Ctx{"task": t.Task.Meta})
return nil
}
......
......@@ -45,9 +45,15 @@ import (
"github.com/PaddlePaddle/Paddle/go/pserver"
"github.com/PaddlePaddle/Paddle/go/pserver/client"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
func init() {
log.Root().SetHandler(
log.LvlFilterHandler(log.LvlWarn, log.CallerStackHandler("%+v", log.StderrHandler)),
)
}
var mu sync.Mutex
var handleMap = make(map[C.paddle_pserver_client]*client.Client)
var curHandle C.paddle_pserver_client
......@@ -164,10 +170,13 @@ func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter,
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Warningf("parameter %s already initialized, treat paddle_init_param as successful.", name)
log.Warn(
"parameter already initialized, treat paddle_init_param as successful.",
log.Ctx{"parameter": name},
)
return C.PSERVER_OK
}
log.Errorln(err)
log.Error("error init param", log.Ctx{"error": err})
return C.PSERVER_ERROR
}
......@@ -180,11 +189,11 @@ func paddle_finish_init_params(client C.paddle_pserver_client) C.int {
err := c.FinishInitParams()
if err != nil {
if err.Error() == pserver.AlreadyInitialized {
log.Warningln("parameters already initialized, treat paddle_finish_init_params as successful.")
log.Warn("parameters already initialized, treat paddle_finish_init_params as successful.")
return C.PSERVER_OK
}
log.Errorln(err)
log.Error("error finish init params", log.Ctx{"error": err})
return C.PSERVER_ERROR
}
......@@ -205,7 +214,7 @@ func paddle_send_grads(client C.paddle_pserver_client, grads **C.paddle_gradient
c := get(client)
err := c.SendGrads(gs)
if err != nil {
log.Errorln(err)
log.Error("error send grads", log.Ctx{"error": err})
return C.PSERVER_ERROR
}
......@@ -222,7 +231,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
c := get(client)
ps, err := c.GetParams(ns)
if err != nil {
log.Errorln(err)
log.Error("error get params", log.Ctx{"error": err})
return C.PSERVER_ERROR
}
......@@ -231,7 +240,13 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
for i, p := range ps {
pn[i] = p.Name
}
log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", "))
log.Error(
"pserver returned wrong number of parameters.",
log.Ctx{
"Requested": strings.Join(pn, ", "),
"Returned": strings.Join(ns, ", "),
},
)
return C.PSERVER_ERROR
}
......@@ -241,7 +256,13 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
for i, p := range ps {
pn[i] = p.Name
}
log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", "))
log.Error(
"pserver returned wrong parameters, or not in requested order.",
log.Ctx{
"Requested": strings.Join(pn, ", "),
"Returned": strings.Join(ns, ", "),
},
)
return C.PSERVER_ERROR
}
}
......@@ -251,13 +272,19 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
if unsafe.Pointer(param) == nil {
log.Errorln("must pre-allocate parameter.")
log.Error("must pre-allocate parameter.")
return C.PSERVER_ERROR
}
if unsafe.Pointer(param.content) != nil {
if int(param.content_len) != len(p.Content) {
log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content))
log.Error(
"the pre-allocated content len does not match parameter content len.",
log.Ctx{
"Pre-allocated len": param.content_len,
"Returned len": len(p.Content),
},
)
return C.PSERVER_ERROR
}
}
......
......@@ -22,7 +22,7 @@ import (
"github.com/PaddlePaddle/Paddle/go/connection"
"github.com/PaddlePaddle/Paddle/go/pserver"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
// TODO(helin): add RPC call retry logic
......@@ -84,7 +84,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
if curServers[i].Addr == "" {
err := c.pservers[i].Close()
if err != nil {
log.Errorln(err)
log.Error("error closing connection to pserver", log.Ctx{"error": err})
}
continue
......@@ -92,7 +92,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
err := c.pservers[i].Connect(curServers[i].Addr)
if err != nil {
log.Errorln(err)
log.Error("error connecting to pserver", log.Ctx{"error": err})
// connect to addr failed, set
// to last known addr in order
......@@ -137,7 +137,7 @@ func (c *Client) FinishInitParams() error {
return err
}
}
return nil
return c.sel.Done()
}
// SendGrads sends gradients to parameter servers for updating
......
......@@ -30,7 +30,7 @@ import (
"github.com/PaddlePaddle/Paddle/go/pserver"
"github.com/PaddlePaddle/Paddle/go/pserver/client"
"github.com/coreos/etcd/clientv3"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
const (
......@@ -90,7 +90,7 @@ func initEtcdClient() {
DialTimeout: time.Second * time.Duration(1),
})
if err != nil {
log.Errorf("err %v", err)
log.Error("error init etcd client", log.Ctx{"error": err})
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err = client.Delete(ctx, pserver.PsDesired)
......
......@@ -25,7 +25,7 @@ import (
"github.com/PaddlePaddle/Paddle/go/pserver"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
const (
......@@ -54,26 +54,29 @@ func (e *Etcd) Desired() int {
resp, err := e.client.Get(ctx, pserver.PsDesired)
cancel()
if err != nil {
log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err)
log.Error(
"Get ps dresire number failed! reconnecting...",
log.Ctx{"error": err},
)
time.Sleep(e.timeout)
continue
}
kvs := resp.Kvs
if len(kvs) == 0 {
log.Infoln("Waiting for ps desired registered ...")
log.Info("Waiting for ps desired registered ...")
time.Sleep(e.timeout)
continue
}
psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil {
log.Errorf("psDesired %d invalid %v", psDesired, err)
log.Error("atoi failed", log.Ctx{"error": err})
time.Sleep(e.timeout)
continue
}
log.Debugf("Get psDesired number: %d", psDesired)
log.Debug("Got psDesired", log.Ctx{"psDesired": psDesired})
break
}
return psDesired
......@@ -88,17 +91,20 @@ func (e *Etcd) List() []Server {
for i := 0; i < psDesired; i++ {
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
psKey := pserver.PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey)
log.Debug("looking for pserver", log.Ctx{"ps key": psKey})
resp, err := e.client.Get(ctx, psKey)
cancel()
if err != nil {
log.Infof("Get psKey= %s error, %v", psKey, err)
log.Info(
"Get psKey error",
log.Ctx{"ps key": psKey, "error": err},
)
time.Sleep(e.timeout)
continue
}
kvs := resp.Kvs
if len(kvs) == 0 {
log.Infof("Waiting for ps addr registered ...")
log.Info("Waiting for ps addr registered ...")
time.Sleep(e.timeout)
continue
}
......@@ -106,11 +112,17 @@ func (e *Etcd) List() []Server {
psAddr := string(resp.Kvs[0].Value)
// TODO(Longfei) check the ps address
if psAddr == "" {
log.Infof("Get psKey = %s, psAddr is empty", psKey)
log.Info(
"Value under psKey is empty",
log.Ctx{"psKey": psKey},
)
time.Sleep(e.timeout)
continue
}
log.Debugf("got value (%s) for key: %s", psAddr, psKey)
log.Debug(
"got psAddr given psKey",
log.Ctx{"psAddr": psAddr, "psKey": psKey},
)
servers[i].Index = i
servers[i].Addr = psAddr
}
......@@ -130,13 +142,13 @@ func NewEtcd(endpoints string) *Etcd {
DialTimeout: defaultEtcdTimeout,
})
if err != nil {
log.Errorf("Init etcd connection failed: %v", err)
log.Error("Init etcd connection failed", log.Ctx{"error": err})
time.Sleep(defaultEtcdTimeout)
continue
}
break
}
log.Infof("Connected to etcd: %s\n", endpoints)
log.Info("Connected to etcd endpoint", log.Ctx{"endpoint": endpoints})
client := &Etcd{
client: cli,
timeout: defaultEtcdTimeout,
......@@ -154,7 +166,7 @@ func (e *Etcd) Select() (bool, error) {
}
lock := concurrency.NewMutex(sess, initLockPath)
log.Infof("Trying to acquire lock at %s.", initLockPath)
log.Info("Trying to acquire lock", log.Ctx{"lock path": initLockPath})
// Do not use timeout context here, since we don't know how
// long does it take for other trainers to initialize the
// parameters.
......@@ -162,7 +174,7 @@ func (e *Etcd) Select() (bool, error) {
if err != nil {
return false, err
}
log.Infof("Successfully acquired lock at %s.", initLockPath)
log.Info("Successfully acquired lock", log.Ctx{"lock path": initLockPath})
get := clientv3.OpGet(initDonePath)
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
......@@ -181,17 +193,17 @@ func (e *Etcd) Select() (bool, error) {
if len(resp.Kvs) == 0 {
// Key value not set, select current trainer.
e.lock = lock
log.Infoln("Trainer selected.")
log.Info("Trainer selected.")
return true, nil
}
if string(resp.Kvs[0].Value) == initDoneVal {
log.Infoln("Initialization is already done.")
log.Info("Initialization is already done.")
ctx, cancel = context.WithTimeout(context.Background(), e.timeout)
err = lock.Unlock(ctx)
cancel()
if err != nil {
log.Errorln(err)
log.Error("error unlocking", log.Ctx{"error": err})
}
return false, nil
}
......@@ -221,7 +233,7 @@ func (e *Etcd) Done() error {
err = e.lock.Unlock(ctx)
cancel()
if err != nil {
log.Errorln(err)
log.Error("error unlocking", log.Ctx{"error": err})
} else {
e.lock = nil
}
......@@ -244,7 +256,7 @@ func (e *Etcd) Close() error {
cErr := e.client.Close()
if cErr != nil {
if err != nil {
log.Errorln(cErr)
log.Error("error closing etcd client", log.Ctx{"error": cErr})
return err
}
return cErr
......
......@@ -24,7 +24,7 @@ import (
"github.com/PaddlePaddle/Paddle/go/utils/networkhelper"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
const (
......@@ -82,19 +82,19 @@ func (e *EtcdClient) Register(port int) (int, error) {
DialTimeout: e.dialTimeout,
})
if err != nil {
log.Errorf("connect to etcd error: %v", err)
log.Error("connect to etcd error", log.Ctx{"error": err})
time.Sleep(retryTimeout)
continue
}
e.client = cli
sess, err := concurrency.NewSession(cli, concurrency.WithTTL(e.ttlSec))
if err != nil {
log.Errorf("create etcd session error: %v", err)
log.Error("create etcd session error", log.Ctx{"error": err})
time.Sleep(retryTimeout)
continue
}
e.sess = sess
log.Debugf("inited client to %s", e.endpoints)
log.Debug("connected to etcd", log.Ctx{"endpoint": e.endpoints})
break
}
// init /ps_desired using transaction, for multiple pservers may want to write
......@@ -104,7 +104,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
_, err := e.initDesiredPservers(ctx, e.numPservers)
cancel()
if err != nil {
log.Warn(err)
log.Warn("pserver init error", log.Ctx{"error": err, "num pservers": e.numPservers})
time.Sleep(retryTimeout)
continue
}
......@@ -119,14 +119,17 @@ func (e *EtcdClient) Register(port int) (int, error) {
resp, err := e.client.Get(ctx, PsDesired)
cancel()
if err != nil {
log.Errorf("getting %s error: %v", PsDesired, err)
log.Error("get etcd key error", log.Ctx{"key": PsDesired, "error": err})
time.Sleep(retryTimeout)
continue
}
if len(resp.Kvs) != 0 {
e.desired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil {
log.Errorf("value of %s invalid %v\n", PsDesired, err)
log.Error(
"psDesired atoi error",
log.Ctx{"error": err, "value": string(resp.Kvs[0].Value)},
)
time.Sleep(retryTimeout)
// NOTE: wait util ps_desired value change
continue
......@@ -143,7 +146,7 @@ func (e *EtcdClient) Register(port int) (int, error) {
pserverIdx, err = e.registerPserverEtcd(ctx, port)
cancel()
if err != nil {
log.Warn(err)
log.Warn("register pserver on etcd error", log.Ctx{"error": err})
time.Sleep(retryTimeout)
continue
}
......@@ -170,16 +173,17 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, er
registered := false
for i := 0; i < e.desired; i++ {
psKey := PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey)
ps := c.Get(psKey)
log.Debugf("got value (%s) for key: %s", ps, psKey)
log.Debug(
"register pserver got value",
log.Ctx{"value": ps, "key": psKey},
)
if ps == "" {
// find the first id and write info
pserverAddr := e.externalIP + ":" + strconv.Itoa(port)
c.Put(psKey, pserverAddr, clientv3.WithLease(e.sess.Lease()))
log.Debugf("set pserver node %s with value %s", psKey, pserverAddr)
log.Debug("register finished")
log.Debug("register finished", log.Ctx{"key": psKey, "value": pserverAddr})
idx = i
registered = true
break
......@@ -239,7 +243,7 @@ func (e *EtcdClient) Shutdown() error {
newErr := e.client.Close()
if newErr != nil {
if err != nil {
log.Errorln(newErr)
log.Error("shutdown error", log.Ctx{"error": newErr})
} else {
err = newErr
}
......
......@@ -25,7 +25,7 @@ import (
"fmt"
"unsafe"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
type optimizer struct {
......@@ -56,12 +56,12 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer
c := paramWithConfigs.Config
s := State
paramBufferSize := C.size_t(len(p.Content))
log.WithFields(log.Fields{
log.Info("New Optimizer Created with config", log.Ctx{
"ElementType": p.ElementType,
"ParamSize": paramBufferSize,
"ConfigSize": len(c),
"StateSize": len(s),
}).Info("New Optimizer Created with config:")
})
var cbuffer unsafe.Pointer
cbuffer = C.malloc(paramBufferSize)
......@@ -72,21 +72,34 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer
}
o.config = c
o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)),
C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s)))
o.opt = C.paddle_create_optimizer(
(*C.uchar)(&c[0]),
C.int(len(c)),
C.paddle_element_type(p.ElementType),
cbuffer,
C.int(paramBufferSize),
(*C.char)(cstate),
C.int(len(s)),
)
return o
}
func (o *optimizer) GetWeights() []byte {
var buffer unsafe.Pointer
// we do not own the buffer, no need to free later.
bufferLen := C.paddle_optimizer_get_weights(o.opt, &buffer)
return cArrayToSlice(buffer, int(bufferLen)*C.sizeof_float)
}
func (o *optimizer) GetStates() []byte {
var cbuffer *C.char
// we owns the state buffer, need to free later.
cbufferLen := C.paddle_optimizer_get_state(o.opt, &cbuffer)
return cArrayToSlice(unsafe.Pointer(cbuffer), int(cbufferLen))
buf := cArrayToSlice(unsafe.Pointer(cbuffer), int(cbufferLen))
cpy := make([]byte, len(buf))
copy(cpy, buf)
C.free(unsafe.Pointer(cbuffer))
return cpy
}
func (o *optimizer) UpdateParameter(g Gradient) error {
......
......@@ -15,8 +15,12 @@
package pserver
import (
"encoding/binary"
"io/ioutil"
"math"
"testing"
"github.com/stretchr/testify/assert"
)
func TestOptimizerCreateRelease(t *testing.T) {
......@@ -36,3 +40,39 @@ func TestOptimizerCreateRelease(t *testing.T) {
o := newOptimizer(param, nil)
o.Cleanup()
}
func float32Bytes(float float32) []byte {
bits := math.Float32bits(float)
bytes := make([]byte, 4)
binary.LittleEndian.PutUint32(bytes, bits)
return bytes
}
func TestOptimizerState(t *testing.T) {
p := Parameter{
Name: "a",
ElementType: Int32,
}
weights := float32Bytes(100)
p.Content = weights
config, err := ioutil.ReadFile("./client/c/test/testdata/optimizer.pb")
if err != nil {
t.Fatalf("read optimizer proto failed")
}
param := ParameterWithConfig{
Param: p,
Config: config,
}
o := newOptimizer(param, nil)
s := o.GetStates()
// clear param content and check if the state is restored.
param.Param.Content = float32Bytes(300)
o1 := newOptimizer(param, s)
s1 := o1.GetStates()
assert.Equal(t, s, s1)
assert.Equal(t, weights, o.GetWeights())
assert.Equal(t, weights, o1.GetWeights())
o.Cleanup()
o1.Cleanup()
}
......@@ -32,7 +32,7 @@ import (
uuid "github.com/satori/go.uuid"
log "github.com/sirupsen/logrus"
log "github.com/inconshreveable/log15"
)
// ElementType is the type of elements of a Parameter.
......@@ -124,6 +124,9 @@ func loadMeta(e *EtcdClient, idx int) (meta checkpointMeta, err error) {
// LoadCheckpoint loads checkpoint from file.
func LoadCheckpoint(e *EtcdClient, idx int) (Checkpoint, error) {
log.Info("Loading checkpoint", "pserver index", idx)
defer traceTime(time.Now(), "load checkpoint")
cpMeta, err := loadMeta(e, idx)
if err != nil {
return nil, err
......@@ -178,6 +181,7 @@ func NewService(idx int, interval time.Duration, path string, client *EtcdClient
func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, _ *int) error {
select {
case <-s.initialized:
log.Warn("init param called but parameters already initialized.")
return errors.New(AlreadyInitialized)
default:
}
......@@ -191,6 +195,13 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, _ *int) error
// properly memory aligned, if not, make copy to a memory
// aligned region.
s.optMap[paramWithConfigs.Param.Name] = newOptimizer(paramWithConfigs, nil)
log.Info(
"init parameter",
"name", paramWithConfigs.Param.Name,
"config len", len(paramWithConfigs.Config),
"param len", len(paramWithConfigs.Param.Content),
"type", paramWithConfigs.Param.ElementType,
)
return nil
}
......@@ -199,6 +210,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, _ *int) error
func (s *Service) FinishInitParams(_ int, _ *int) error {
select {
case <-s.initialized:
log.Warn("finished init param called but parameters already initialized.")
return errors.New(AlreadyInitialized)
default:
}
......@@ -209,10 +221,12 @@ func (s *Service) FinishInitParams(_ int, _ *int) error {
for range t {
err := s.checkpoint()
if err != nil {
log.Errorln(err)
log.Error("finish init params error", log.Ctx{"error": err})
}
}
}()
log.Info("init parameter finished.")
return nil
}
......@@ -222,6 +236,7 @@ func (s *Service) SendGrad(g Gradient, _ *int) error {
select {
case <-s.initialized:
default:
log.Warn("received gradient before initialization.", "name", g.Name, "size", len(g.Content), "type", g.ElementType)
return errors.New(Uninitialized)
}
......@@ -233,6 +248,7 @@ func (s *Service) SendGrad(g Gradient, _ *int) error {
return fmt.Errorf("parameter: %s does not exist", g.Name)
}
log.Info("received gradient from trainer, updating gradient.", "name", g.Name, "size", len(g.Content), "type", g.ElementType)
return o.UpdateParameter(g)
}
......@@ -244,6 +260,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
opt, ok := s.optMap[name]
if !ok {
log.Warn("trainer wants to get a parameter that does not exist.", "name", name)
return fmt.Errorf("parameter: %s does not exist", name)
}
......@@ -257,12 +274,13 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
parameter.Name = name
parameter.ElementType = opt.elementType
parameter.Content = opt.GetWeights()
log.Info("sending parameter to the trainer", "name", parameter.Name, "size", len(parameter.Content), "type", parameter.ElementType)
return nil
}
func traceTime(start time.Time, name string) {
elapsed := time.Since(start)
log.Infof("%s took %v", name, elapsed)
log.Info("time elapsed", log.Ctx{"name": name, "elapsed": elapsed})
}
// checkpoint saves checkpoint to disk.
......@@ -270,7 +288,7 @@ func traceTime(start time.Time, name string) {
// checkpoint should be only called after the parameters are
// initialized.
func (s *Service) checkpoint() (err error) {
log.Infoln("Begin save checkpoint.")
log.Info("Begin save checkpoint.")
defer traceTime(time.Now(), "save checkpoint")
s.mu.Lock()
......@@ -297,6 +315,13 @@ func (s *Service) checkpoint() (err error) {
return
}
if _, err = os.Stat(s.checkpointPath); os.IsNotExist(err) {
err = os.MkdirAll(s.checkpointPath, os.ModePerm)
if err != nil {
return
}
}
id := uuid.NewV4().String()
p := path.Join(s.checkpointPath, id)
f, err := os.Create(p)
......@@ -308,7 +333,7 @@ func (s *Service) checkpoint() (err error) {
closeErr := f.Close()
if closeErr != nil {
if err != nil {
log.Errorln(closeErr)
log.Error("error close checkpoint file", log.Ctx{"error": closeErr})
} else {
// Set closeErr as return value.
err = closeErr
......@@ -329,7 +354,7 @@ func (s *Service) checkpoint() (err error) {
oldMeta, err := loadMeta(s.client, s.idx)
if err == ErrCheckpointNotFound {
log.Infoln("Do not have existing checkpoint.")
log.Info("Do not have existing checkpoint.")
err = nil
}
......@@ -361,7 +386,7 @@ func (s *Service) checkpoint() (err error) {
if rmErr != nil {
// log error, but still treat checkpoint as
// successful.
log.Errorln(rmErr)
log.Error("remove old meta file error", log.Ctx{"error": rmErr})
}
}
......
......@@ -28,23 +28,37 @@ add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER}
add_dependencies(paddle_capi paddle_proto)
# combine all paddle static libraries together, into libpaddle_capi_whole.a
# user should use PaddleCAPI as -lpaddle_capi_whole
set(PADDLE_CAPI_INFER_LIBS
paddle_utils
paddle_parameter
paddle_math
paddle_cuda
paddle_function
paddle_gserver
paddle_proto)
# TODO: paddle_capi_whole will be removed.
if(MOBILE_INFERENCE)
set(PADDLE_CAPI_INFER_LIBS
paddle_utils
paddle_parameter
paddle_math
paddle_cuda
paddle_function
paddle_gserver
paddle_proto)
else()
set(PADDLE_CAPI_INFER_LIBS
paddle_utils
paddle_parameter
paddle_math
paddle_cuda
paddle_function
paddle_gserver
paddle_proto
paddle_pserver
paddle_network)
endif()
cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS})
# No shared library for iOS
# Link the static library for inference
cc_library(paddle_capi_engine DEPS paddle_capi paddle_utils paddle_parameter paddle_math paddle_cuda paddle_proto)
cc_library(paddle_capi_layers DEPS paddle_function paddle_gserver)
# Link the shared library for inference
if(NOT IOS)
set(LINK_FLAGS " -Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/export.sym -Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/export.map")
# TODO: merge mkl into paddle_capi_shared
set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_capi.map")
add_library(paddle_capi_shared SHARED ${CAPI_SOURCES})
set_target_properties(paddle_capi_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
target_include_directories(paddle_capi_shared PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
......@@ -53,9 +67,10 @@ endif()
# install library & headers.
install(FILES ${CAPI_HEADERS} DESTINATION include/paddle)
install(FILES paddle_capi.map DESTINATION include/paddle)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h DESTINATION include/paddle)
if(ANDROID)
install(TARGETS paddle_capi_whole paddle_capi_shared
install(TARGETS paddle_capi_whole paddle_capi_engine paddle_capi_layers paddle_capi_shared
ARCHIVE DESTINATION lib/${ANDROID_ABI}
LIBRARY DESTINATION lib/${ANDROID_ABI})
execute_process(
......@@ -80,7 +95,7 @@ if(ANDROID)
)"
)
else(ANDROID)
install(TARGETS paddle_capi_whole ARCHIVE DESTINATION lib)
install(TARGETS paddle_capi_whole paddle_capi_engine paddle_capi_layers ARCHIVE DESTINATION lib)
if(NOT IOS)
install(TARGETS paddle_capi_shared DESTINATION lib)
endif()
......
# ddim lib
proto_library(framework_proto SRCS framework.proto)
proto_library(saver_proto SRCS framework.proto saver.proto)
cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
......@@ -7,8 +10,8 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor saver_proto framework_proto)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
cc_test(variable_test SRCS variable_test.cc)
......@@ -16,17 +19,17 @@ cc_test(variable_test SRCS variable_test.cc)
cc_library(scope SRCS scope.cc)
cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info)
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info operator)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
py_proto_compile(framework_py_proto SRCS framework.proto)
......@@ -42,7 +45,10 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog)
cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
......
......@@ -19,19 +19,7 @@ limitations under the License. */
namespace paddle {
namespace framework {
static ProgramDesc* g_program_desc = nullptr;
ProgramDesc& GetProgramDesc() {
if (g_program_desc == nullptr) {
g_program_desc = new ProgramDesc();
auto root_block = g_program_desc->mutable_blocks()->Add();
root_block->set_idx(0);
root_block->set_parent_idx(-1);
}
return *g_program_desc;
}
Attribute GetAttrValue(const OpDesc::Attr& attr_desc) {
Attribute GetAttrValue(const OpDesc::Attr& attr_desc, ProgramDesc* program) {
switch (attr_desc.type()) {
case framework::AttrType::BOOLEAN: {
return attr_desc.b();
......@@ -74,7 +62,9 @@ Attribute GetAttrValue(const OpDesc::Attr& attr_desc) {
return val;
}
case framework::AttrType::BLOCK: {
return GetProgramDesc().mutable_blocks(attr_desc.block_idx());
PADDLE_ENFORCE(program != nullptr,
"Need to specify ProgramDesc when get a block attr");
return program->mutable_blocks(attr_desc.block_idx());
}
}
PADDLE_ENFORCE(false, "Unknown OpDesc::AttrDesc::type !");
......
......@@ -26,16 +26,13 @@ limitations under the License. */
namespace paddle {
namespace framework {
ProgramDesc& GetProgramDesc();
template <typename T>
inline AttrType AttrTypeID() {
Attribute tmp = T();
return static_cast<AttrType>(tmp.which() - 1);
}
Attribute GetAttrValue(const OpDesc::Attr& attr_desc);
Attribute GetAttrValue(const OpDesc::Attr& attr_desc, ProgramDesc* desc);
class AttrReader {
public:
......
......@@ -21,6 +21,7 @@
#include "paddle/framework/block_desc.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/dynamic_recurrent_op.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h"
......@@ -220,8 +221,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// process recurrent gradient op as a special operator.
if (forwardOp.Type() == "recurrent") {
// NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
// or
// this will result in infinite loop.
// or this will result in infinite loop.
const auto& rnnop =
*static_cast<const operators::RecurrentOp*>(&forwardOp);
auto rnn_grad_op =
......@@ -231,6 +231,18 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// create stepnet's gradient op
rnn_grad_op->set_stepnet(
BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
} else if (forwardOp.Type() == "dynamic_recurrent") {
// NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
// or this will result in infinite loop.
const auto& rnnop =
*static_cast<const operators::DynamicRecurrentOp*>(&forwardOp);
auto rnn_grad_op =
static_cast<operators::DynamicRecurrentGradientOp*>(grad_op.get());
const auto& stepnet_op =
*static_cast<const OperatorBase*>(&rnnop.rnn.GetStepUnit());
// create stepnet's gradient op
rnn_grad_op->rnn.SetStepUnit(
BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
}
if (net->ops_.empty()) { // Current no aux op is added to network
......@@ -281,12 +293,16 @@ static void CreateGradVarInBlock(
auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size();
++op_index) {
bool need_infer_shape = false;
ForEachVarName(ops[op_index]->Outputs(),
[&](const std::string& grad_var_name) {
if (block_desc->HasVar(grad_var_name)) {
return false;
}
block_desc->Var(grad_var_name);
need_infer_shape = true;
auto var = block_desc->Var(grad_var_name);
// FIXME(qiao) infer the datatype
var->SetDataType(framework::DataType::FP32);
auto it = param_name_map.find(grad_var_name);
if (it == param_name_map.end()) {
return false;
......@@ -298,12 +314,14 @@ static void CreateGradVarInBlock(
grad_record.op_idx_ = static_cast<int>(op_index);
return false; /* not break */
});
if (need_infer_shape) {
ops[op_index]->InferShape(*block_desc);
}
}
}
std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
const std::unique_ptr<OpDescBind>& op_desc,
std::unordered_set<std::string>* no_grad_vars,
const OpDescBind* op_desc, std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs;
// All input gradients of forwarding operator do not need to calculate.
......@@ -350,7 +368,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
BlockDescBind* cur_block = program_desc.Block(block_idx);
std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_;
std::vector<OpDescBind*> op_descs = cur_block->AllOps();
std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
size_t grad_desc_idx = 0;
std::vector<std::unique_ptr<OpDescBind>> backward_descs;
......@@ -368,7 +386,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
program_desc, step_block_idx, no_grad_vars, grad_to_var);
BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block);
for (auto& ptr : backward_block_op_descs) {
backward_block->ops_.push_back(std::move(ptr));
backward_block->AppendAllocatedOp(std::move(ptr));
}
op_grads[0]->SetBlockAttr("step_block", *backward_block);
}
......@@ -425,17 +443,22 @@ ParamGradInfoMap AppendBackward(
const int root_block_idx = 0;
auto root_block = program_desc.Block(root_block_idx);
auto& all_ops = root_block->ops_;
// insert fill one op for target
// TODO(qiao) add some check to the target.
std::string fill_one_op_out = GradVarName(target.Name());
std::vector<int64_t> target_shape_desc = target.Shape();
std::vector<int> target_shape;
std::transform(target_shape_desc.begin(), target_shape_desc.end(),
std::back_inserter(target_shape),
[](int64_t dim) { return static_cast<int>(dim); });
std::unique_ptr<OpDescBind> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", std::vector<int>{1}},
{{"shape", target_shape},
{"value", static_cast<float>(1.0)},
{"data_type", framework::DataType::FP32}}));
all_ops.push_back(std::move(fill_one_op));
size_t forward_op_num = all_ops.size();
root_block->AppendAllocatedOp(std::move(fill_one_op));
size_t forward_op_num = root_block->OpSize();
size_t forward_block_num = program_desc.Size();
// Insert backward operators
......@@ -443,13 +466,22 @@ ParamGradInfoMap AppendBackward(
auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
&no_grad_var_names, &grad_to_var);
std::unordered_map<std::string, GradVarInfo> retv;
// Create Variable
for (auto& ptr : backward_op_descs) {
all_ops.push_back(std::move(ptr));
root_block->AppendAllocatedOp(std::move(ptr));
}
root_block->Var(fill_one_op_out);
// Create Variable
// Create target gradient variable
std::unordered_map<std::string, GradVarInfo> retv;
auto var = root_block->Var(fill_one_op_out);
// FIXME(qiao) infer the data type
var->SetDataType(framework::DataType::FP32);
var->SetShape(target.Shape());
auto& target_grad = retv[target.Name()];
target_grad.name_ = fill_one_op_out;
target_grad.block_idx_ = root_block_idx;
target_grad.op_idx_ = static_cast<int>(forward_op_num);
// create grad_var for all blocks in this program
CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
......
......@@ -26,6 +26,20 @@ namespace framework {
using DeviceContext = platform::DeviceContext;
class NoneOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {}
};
template <typename Place, typename T>
class NoneKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {}
};
class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
public:
RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
......@@ -215,19 +229,51 @@ class MinusOpMaker : public OpProtoAndCheckerMaker {
namespace f = paddle::framework;
namespace ops = paddle::operators;
using EnforceNotMet = paddle::platform::EnforceNotMet;
REGISTER_OPERATOR(rowwise_add, f::NOP, f::RowWiseAddOpMaker,
// rowwise_add
REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker,
f::RowWiseAddGradMaker);
REGISTER_OPERATOR(rowwise_add_grad, f::NOP);
REGISTER_OP(mul, f::NOP, f::MulOpMaker, mul_grad, f::NOP);
REGISTER_OP(sigmoid, f::NOP, f::SigmoidOpMaker, sigmoid_grad, f::NOP);
REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NOP, f::NoGradOpMaker);
REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NOP, f::FillZeroOpMaker);
REGISTER_OP(sum, f::NOP, f::SumOpMaker, sum_grad, f::NOP);
REGISTER_OP_CPU_KERNEL(rowwise_add,
f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(rowwise_add_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// mul
REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(mul_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// sigmoid
REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(sigmoid,
f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker);
// fill_zeros_like
REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker);
REGISTER_OP_CPU_KERNEL(fill_zeros_like,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// sum
REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp);
REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(sum_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// fc
REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad,
f::NOP);
REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP);
REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker);
// many_output_op
REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker,
many_output_op_grad, f::NoneOp);
// mult_in_out
REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad,
f::NoneOp);
REGISTER_OP_CPU_KERNEL(mult_in_out,
f::NoneKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(mult_in_out_grad,
f::NoneKernel<paddle::platform::CPUPlace, float>);
// minus
REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker);
REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel<paddle::platform::CPUPlace, float>);
// scale
REGISTER_OPERATOR(scale, f::NoneOp);
REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel<paddle::platform::CPUPlace, float>);
TEST(Backward, simple_op_not_need_grad) {
auto fwd = f::OpRegistry::CreateOp(
......@@ -449,20 +495,10 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL);
}
// =================================== //
f::ProgramDesc *GetNewProgramDesc() {
auto *program_desc = new f::ProgramDesc();
auto *root_block = program_desc->add_blocks();
root_block->set_idx(0);
root_block->set_parent_idx(-1);
return program_desc;
}
TEST(Backward, simple_single_op) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op = block->AppendOp();
op->SetType("rowwise_add");
op->SetInput("X", {"x"});
......@@ -487,7 +523,7 @@ TEST(Backward, simple_single_op) {
EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b")}));
EXPECT_EQ(var_to_grad.size(), 2UL);
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2));
EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2));
......@@ -496,8 +532,7 @@ TEST(Backward, simple_single_op) {
}
TEST(Backward, default_attribute) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op = block->AppendOp();
op->SetType("mul");
......@@ -523,8 +558,7 @@ TEST(Backward, default_attribute) {
}
TEST(Backward, simple_mult_op) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("rowwise_add");
......@@ -588,7 +622,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")}));
EXPECT_EQ(var_to_grad.size(), 6UL);
EXPECT_EQ(var_to_grad.size(), 7UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
......@@ -607,8 +641,7 @@ TEST(Backward, simple_mult_op) {
}
TEST(Backward, intermedia_var_no_grad) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("rowwise_add");
......@@ -666,7 +699,7 @@ TEST(Backward, intermedia_var_no_grad) {
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector<std::string>());
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.size(), 4UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
......@@ -678,8 +711,7 @@ TEST(Backward, intermedia_var_no_grad) {
}
TEST(Backward, var_no_grad) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("mult_in_out");
......@@ -744,7 +776,7 @@ TEST(Backward, var_no_grad) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
std::vector<std::string>({f::GradVarName("h1")}));
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.size(), 4UL);
EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3));
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5));
EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5));
......@@ -755,8 +787,7 @@ TEST(Backward, var_no_grad) {
}
TEST(Backward, shared_var) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("rowwise_add");
......@@ -830,7 +861,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")}));
EXPECT_EQ(var_to_grad.size(), 5UL);
EXPECT_EQ(var_to_grad.size(), 6UL);
EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
EXPECT_EQ(var_to_grad.at("out1"),
......@@ -846,8 +877,7 @@ TEST(Backward, shared_var) {
}
TEST(Backward, half_backward) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::ProgramDescBind program;
f::BlockDescBind *block = program.Block(0);
auto *op1 = block->AppendOp();
op1->SetType("minus");
......@@ -863,7 +893,7 @@ TEST(Backward, half_backward) {
auto ops = block->AllOps();
ASSERT_EQ(3UL, ops.size());
EXPECT_EQ(var_to_grad.size(), 1UL);
EXPECT_EQ(var_to_grad.size(), 2UL);
EXPECT_EQ(var_to_grad.at("a"),
f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1));
}
......@@ -19,11 +19,11 @@ namespace paddle {
namespace framework {
VarDescBind *BlockDescBind::Var(const std::string &name) {
need_update_ = true;
auto it = vars_.find(name);
if (it != vars_.end()) {
return it->second.get();
}
need_update_ = true;
auto *var = new VarDescBind(name);
vars_[name].reset(var);
return var;
......@@ -41,6 +41,19 @@ bool BlockDescBind::HasVar(const std::string &name) const {
return vars_.find(name) != vars_.end();
}
VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const {
auto it = vars_.find(name);
if (it == vars_.end()) {
return Parent() == kNoneBlockIndex ? nullptr
: ParentBlock()->FindVarRecursive(name);
}
return it->second.get();
}
bool BlockDescBind::HasVarRecursive(const std::string &name) const {
return FindVarRecursive(name) != nullptr;
}
std::vector<VarDescBind *> BlockDescBind::AllVars() const {
std::vector<VarDescBind *> res;
for (const auto &p : vars_) {
......@@ -55,6 +68,11 @@ OpDescBind *BlockDescBind::AppendOp() {
return ops_.back().get();
}
void BlockDescBind::AppendAllocatedOp(std::unique_ptr<OpDescBind> &&op_desc) {
need_update_ = true;
ops_.emplace_back(std::move(op_desc));
}
OpDescBind *BlockDescBind::PrependOp() {
need_update_ = true;
ops_.emplace_front(new OpDescBind());
......@@ -70,15 +88,19 @@ std::vector<OpDescBind *> BlockDescBind::AllOps() const {
}
void BlockDescBind::Flush() {
for (auto &op_desc : ops_) {
op_desc->Flush();
}
if (need_update_) {
auto &op_field = *this->desc_->mutable_ops();
op_field.Clear();
this->ClearPBOps();
op_field.Reserve(static_cast<int>(ops_.size()));
for (auto &op_desc : ops_) {
op_field.AddAllocated(op_desc->Proto());
}
auto &var_field = *this->desc_->mutable_vars();
var_field.Clear();
this->ClearPBVars();
var_field.Reserve(static_cast<int>(vars_.size()));
for (auto &var_desc : vars_) {
var_field.AddAllocated(var_desc.second->Proto());
......@@ -88,7 +110,7 @@ void BlockDescBind::Flush() {
}
BlockDescBind *BlockDescBind::ParentBlock() const {
if (this->desc_->parent_idx() == -1) {
if (this->desc_->parent_idx() == kNoneBlockIndex) {
return nullptr;
}
return prog_->Block(static_cast<size_t>(this->desc_->parent_idx()));
......@@ -98,6 +120,35 @@ BlockDesc *BlockDescBind::Proto() {
Flush();
return desc_;
}
BlockDescBind::BlockDescBind(const BlockDescBind &other, BlockDesc *desc,
ProgramDescBind *prog)
: prog_(prog), desc_(desc) {
need_update_ = true;
for (auto &op : other.ops_) {
ops_.emplace_back(new OpDescBind(*op));
}
for (auto &it : other.vars_) {
auto *var = new VarDescBind(*it.second);
vars_[it.first].reset(var);
}
}
void BlockDescBind::ClearPBOps() {
auto ops = this->desc_->mutable_ops();
while (!ops->empty()) {
// we do not own the OpDesc, so release the ownership.
ops->ReleaseLast();
}
}
void BlockDescBind::ClearPBVars() {
auto vars = this->desc_->mutable_vars();
while (!vars->empty()) {
// we do not own the VarDesc, so release the ownership.
vars->ReleaseLast();
}
}
} // namespace framework
} // namespace paddle
......@@ -16,9 +16,12 @@ limitations under the License. */
#include <deque>
#include <memory>
#include <set>
#include <unordered_map>
#include <vector>
#include "paddle/framework/op_desc.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/framework/var_desc.h"
#include "paddle/platform/macros.h"
......@@ -36,6 +39,14 @@ class BlockDescBind {
BlockDescBind(ProgramDescBind *prog, BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) {}
BlockDescBind(const BlockDescBind &other, BlockDesc *desc,
ProgramDescBind *prog);
~BlockDescBind() {
this->ClearPBVars();
this->ClearPBOps();
}
int32_t ID() const { return desc_->idx(); }
int32_t Parent() const { return desc_->parent_idx(); }
......@@ -46,23 +57,43 @@ class BlockDescBind {
bool HasVar(const std::string &var_name) const;
VarDescBind *FindVarRecursive(const std::string &name_bytes) const;
bool HasVarRecursive(const std::string &var_name) const;
std::set<std::string> LocalVarNames() const {
std::set<std::string> var_names;
for (auto &var : vars_) {
var_names.insert(var.first);
}
return var_names;
}
std::vector<VarDescBind *> AllVars() const;
BlockDescBind *ParentBlock() const;
OpDescBind *AppendOp();
void AppendAllocatedOp(std::unique_ptr<OpDescBind> &&op_desc);
OpDescBind *PrependOp();
std::vector<OpDescBind *> AllOps() const;
size_t OpSize() const { return ops_.size(); }
OpDescBind *Op(int idx) { return ops_.at(idx).get(); }
void Flush();
BlockDesc *Proto();
// FIXME(yuyang18): backward will access private data of BlockDesc.
// Mark it public temporary. We can fix it later.
public:
private:
void ClearPBOps();
void ClearPBVars();
private:
ProgramDescBind *prog_; // not_own
BlockDesc *desc_; // not_own
bool need_update_;
......
......@@ -26,6 +26,8 @@ inline DataType ToDataType(std::type_index type) {
return DataType::FP64;
} else if (typeid(int).hash_code() == type.hash_code()) {
return DataType::INT32;
} else if (typeid(int64_t).hash_code() == type.hash_code()) {
return DataType::INT64;
} else {
PADDLE_THROW("Not supported");
}
......
......@@ -64,99 +64,27 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
auto& block = pdesc.blocks(block_id);
auto& device = device_contexts_[0];
// Instantiate all the vars in the global scope
for (auto& var : block.vars()) {
scope->Var(var.name());
}
Scope& local_scope = scope->NewScope();
std::vector<bool> should_run = Prune(pdesc, block_id);
PADDLE_ENFORCE_EQ(should_run.size(), static_cast<size_t>(block.ops_size()));
for (size_t i = 0; i < should_run.size(); ++i) {
if (should_run[i]) {
for (auto& var : block.ops(i).outputs()) {
for (auto& argu : var.arguments()) {
if (local_scope.FindVar(argu) == nullptr) {
local_scope.Var(argu);
}
}
}
auto op = paddle::framework::OpRegistry::CreateOp(block.ops(i));
op->Run(local_scope, *device);
}
}
// TODO(tonyyang-svail):
// - Destroy local_scope
}
std::vector<bool> Prune(const ProgramDesc& pdesc, int block_id) {
// TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op
auto& block = pdesc.blocks(block_id);
auto& ops = block.ops();
bool expect_feed = true;
for (auto& op_desc : ops) {
PADDLE_ENFORCE(op_desc.type() != kFeedOpType || expect_feed,
"All FeedOps are at the beginning of the ProgramDesc");
expect_feed = (op_desc.type() == kFeedOpType);
}
bool expect_fetch = true;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
PADDLE_ENFORCE(op_desc.type() != kFetchOpType || expect_fetch,
"All FetchOps must at the end of the ProgramDesc");
expect_fetch = (op_desc.type() == kFetchOpType);
}
std::set<std::string> dependent_vars;
std::vector<bool> should_run;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
bool found_dependent_vars = false;
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
if (dependent_vars.count(argu) != 0) {
found_dependent_vars = true;
}
}
}
if (op_desc.type() == kFetchOpType || found_dependent_vars) {
// erase its output to the dependency graph
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.erase(argu);
}
}
// insert its input to the dependency graph
for (auto& var : op_desc.inputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.insert(argu);
}
}
should_run.push_back(true);
for (auto& var : block.vars()) {
if (var.persistable()) {
auto* ptr = scope->Var(var.name());
VLOG(3) << "Create Variable " << var.name()
<< " global, which pointer is " << ptr;
} else {
should_run.push_back(false);
auto* ptr = local_scope.Var(var.name());
VLOG(3) << "Create Variable " << var.name()
<< " locally, which pointer is " << ptr;
}
}
// TODO(tonyyang-svail):
// - check this after integration of Init
// PADDLE_ENFORCE(dependent_vars.empty());
// since we are traversing the ProgramDesc in reverse order
// we reverse the should_run vector
std::reverse(should_run.begin(), should_run.end());
for (auto& op_desc : block.ops()) {
auto op = paddle::framework::OpRegistry::CreateOp(
op_desc, const_cast<ProgramDesc*>(&pdesc));
op->Run(local_scope, *device);
}
return should_run;
scope->DeleteScope(&local_scope);
}
} // namespace framework
......
......@@ -40,16 +40,5 @@ class Executor {
std::vector<platform::DeviceContext*> device_contexts_;
};
/* @Brief
* Pruning the graph
*
* @param
* ProgramDesc
*
* @return
* vector<bool> Same size as ops. Indicates whether an op should be run.
*/
std::vector<bool> Prune(const ProgramDesc& pdesc, int block_id);
} // namespace framework
} // namespace paddle
......@@ -13,37 +13,45 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/variable.h"
namespace paddle {
namespace framework {
template <typename T>
void SetFeedVariable(const LoDTensor& input, const std::string& var_name,
size_t index) {
void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
Variable* g_feed_value = GetGlobalScope().Var(var_name);
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs =
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
if (index >= feed_inputs.size()) {
feed_inputs.resize(index + 1);
}
// shared data with input tensor
feed_inputs[index].ShareDataWith<T>(input);
feed_inputs[index].ShareDataWith(input);
// set lod
feed_inputs[index].set_lod(input.lod());
}
LoDTensor& GetFetchVariable(const std::string& var_name, size_t index) {
LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
size_t index) {
// Since we want to fetch LodTensor from a variable, the variable must
// be created alreadly.
Variable* g_fetch_value = GetGlobalScope().FindVar(var_name);
auto& fetch_outputs =
*(g_fetch_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
Variable* g_fetch_value = scope.FindVar(var_name);
PADDLE_ENFORCE(g_fetch_value->IsType<FeedFetchList>(),
"Only %s can be invoked by GetFetchVariable",
typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index];
VLOG(3) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return fetch_outputs[index];
return tensor;
}
} // namespace framework
......
......@@ -55,6 +55,7 @@ message OpDesc {
repeated Var inputs = 1;
repeated Var outputs = 2;
repeated Attr attrs = 4;
optional bool is_target = 5 [ default = false ];
};
// OpProto describes a C++ framework::OperatorBase derived class.
......@@ -67,6 +68,7 @@ message OpProto {
optional bool duplicable = 3 [ default = false ];
optional bool intermediate = 4 [ default = false ];
optional bool dispensable = 5 [ default = false ];
}
// AttrProto describes the C++ type Attribute.
......@@ -111,6 +113,9 @@ message VarDesc {
enum VarType {
LOD_TENSOR = 1;
SELECTED_ROWS = 2;
FEED_MINIBATCH = 3;
FETCH_LIST = 4;
STEP_SCOPES = 5;
}
required string name = 1;
required VarType type = 2;
......
......@@ -13,6 +13,15 @@
limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/saver.pb.h"
#include "paddle/memory/memcpy.h"
#include "paddle/memory/memory.h"
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <iterator>
#include <glog/logging.h>
......@@ -25,31 +34,50 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
for (size_t i = level_begin; i < level_end; i++) {
new_lod.emplace_back(in.at(i));
}
// transform the lowest level to absolute offset.
LoD abs_offset_lod = ToAbsOffset(in);
new_lod.back() = abs_offset_lod[level_end - 1];
return new_lod;
}
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
size_t elem_end) {
// slice the lod.
LoD new_lod;
new_lod.reserve(in.size() - level);
auto start = in.at(level)[elem_begin];
auto end = in.at(level)[elem_end];
for (auto it = in.begin() + level; it != in.end(); it++) {
auto it_begin = std::find(it->begin(), it->end(), start);
auto it_end = std::find(it_begin, it->end(), end);
PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
new_lod.emplace_back(it_begin, it_end + 1);
// reset offset if tensor is copyed and sliced.
std::transform(new_lod.back().begin(), new_lod.back().end(),
new_lod.back().begin(),
[start](int v) { return v - start; });
PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LoD");
PADDLE_ENFORCE_LT(level, in.size());
PADDLE_ENFORCE_LT(elem_end, in[level].size());
LoD res;
res.resize(in.size() - level);
// copy the first level
res[0].assign(in[level].begin() + elem_begin,
in[level].begin() + elem_end + 1);
for (size_t lvl = 1; lvl < res.size(); lvl++) {
const auto& in_level = in[level + lvl];
const auto& above_level = res[lvl - 1];
auto& out_level = res[lvl];
out_level.assign(in_level.begin() + above_level.front(),
in_level.begin() + above_level.back() + 1);
}
PADDLE_ENFORCE_LE(new_lod.size(), in.size());
return new_lod;
for (size_t lvl = 0; lvl < res.size(); lvl++) {
// to make the first offset equals 0, all the elements minus the first
// element
size_t front = res[lvl].front();
for (auto& ele : res[lvl]) {
ele -= front;
}
}
return res;
}
LoD ToAbsOffset(const LoD& in) {
// the lowest level stores relative offsets
if (in.empty() || in.size() == 1) return in;
LoD result = in;
for (int level = result.size() - 2; level >= 0; level--) {
for (auto& ele : result[level]) {
ele = result[level + 1][ele];
}
}
return result;
}
bool operator==(const LoD& a, const LoD& b) {
......@@ -75,17 +103,16 @@ bool operator==(const LoD& a, const LoD& b) {
size_t LoDTensor::NumElements(size_t level, size_t idx) const {
PADDLE_ENFORCE_LT(level, NumLevels());
PADDLE_ENFORCE_LT(idx, NumElements(level));
// the last level of LoD, just return number of records in Tensor
if (level == NumLevels() - 1) {
return lod_[level][idx + 1] - lod_[level][idx];
}
// high level of LoD, and there is another lower level, return number of
// lower-level elements
auto tmp = SliceInLevel(lod_, level, idx, idx + 1);
PADDLE_ENFORCE_GE(tmp.size(), 2);
// there is a 0 as a placeholder stored in LoD, so the number of elements
// equals lod.size() - 1
return tmp[1].size() - 1;
return lod_[level][idx + 1] - lod_[level][idx];
}
size_t LoDTensor::NumInstancesInElement(size_t level, size_t idx) const {
PADDLE_ENFORCE_LT(level, NumLevels());
PADDLE_ENFORCE_LT(idx, NumElements(level));
auto abs_lod = ToAbsOffset(lod());
size_t begin = abs_lod[level][idx];
size_t end = abs_lod[level][idx + 1];
return end - begin;
}
void LoDTensor::ShrinkLevels(size_t level_begin, size_t level_end) {
......@@ -99,8 +126,150 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
PADDLE_ENFORCE_LT(elem_begin, NumElements(level));
PADDLE_ENFORCE_LT(elem_end, NumElements(level) + 1);
auto abs_lod = framework::ToAbsOffset(lod());
auto new_lod = framework::SliceInLevel(lod_, level, elem_begin, elem_end);
lod_ = new_lod;
// slice the underlying tensor
size_t begin = abs_lod[level][elem_begin];
size_t end = abs_lod[level][elem_end];
PADDLE_ENFORCE_LT(begin, end, "Cannot shrink, the result tensor is empty.");
ShareDataWith(Slice(begin, end));
}
std::string LoDTensor::SerializeToString() const {
LoDTensorProto desc;
// set data_type
if (this->type() == typeid(int8_t)) desc.set_data_type(DataType::BOOL);
if (this->type() == typeid(int16_t)) desc.set_data_type(DataType::INT16);
if (this->type() == typeid(int32_t)) desc.set_data_type(DataType::INT32);
if (this->type() == typeid(int64_t)) desc.set_data_type(DataType::INT64);
// FIXME(dzh): there is no fp16 in standard c++
if (this->type() == typeid(float)) // NOLINT
desc.set_data_type(DataType::FP32);
if (this->type() == typeid(double)) // NOLINT
desc.set_data_type(DataType::FP64);
for (int i = 0; i < dims().size(); ++i) {
desc.add_dims(dims()[i]);
}
// set lod information
desc.set_lod_level(this->NumLevels());
for (size_t i = 0; i < this->NumLevels(); ++i) {
LoDInfo* lod = desc.add_levels();
for (size_t j = 0; j < lod_[i].size(); ++j) {
lod->add_level(lod_[i][j]);
}
}
desc.set_version(0);
std::string desc_bytes = desc.SerializeAsString();
// FIXME(dzh) : implement fix chunk size buffer.
size_t DESC_SIZE = desc_bytes.size();
size_t DATA_SIZE = holder_->size() - offset_;
const size_t BUFFER_SIZE = DESC_SIZE + DATA_SIZE + 2 * sizeof(size_t);
char* buffer =
static_cast<char*>(memory::Alloc(platform::CPUPlace(), BUFFER_SIZE));
// format: desc_size data_size, desc_bytes, data_bytes.
platform::CPUPlace src_place;
platform::CPUPlace dst_place;
memory::Copy(dst_place, buffer, src_place, &BUFFER_SIZE, sizeof(size_t));
memory::Copy(dst_place, buffer + sizeof(size_t), src_place, &DESC_SIZE,
sizeof(size_t));
memory::Copy(dst_place, buffer + sizeof(size_t) * 2, src_place,
desc_bytes.c_str(), desc_bytes.size());
PADDLE_ENFORCE(this->numel() != 0, "Serialize a empty Tensor!");
platform::Place place = holder_->place();
int element_width = holder_->size() / this->numel();
if (platform::is_cpu_place(place)) {
memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(),
boost::get<platform::CPUPlace>(place),
static_cast<char*>(holder_->ptr()) + offset_ / element_width,
DATA_SIZE);
}
#ifdef PADDLE_WITH_GPU
if (platform::is_gpu_place(place)) {
memory::Copy(dst_place, buffer + sizeof(size_t) * 2 + desc_bytes.size(),
boost::get<platform::GPUPlace>(place),
static_cast<char*>(holder_->ptr()) + offset_ / element_width,
DATA_SIZE);
}
#endif
std::string ret(buffer, BUFFER_SIZE);
memory::Free(platform::CPUPlace(), buffer);
return ret;
}
void LoDTensor::DeserializeFromString(const std::string& s,
const platform::Place& dst_place) {
size_t DESC_SIZE, BUFFER_SIZE;
platform::CPUPlace src_place;
memory::Copy(src_place, &BUFFER_SIZE, src_place, s.c_str(), sizeof(size_t));
memory::Copy(src_place, &DESC_SIZE, src_place, s.c_str() + sizeof(size_t),
sizeof(size_t));
const size_t DATA_SIZE = BUFFER_SIZE - DESC_SIZE - sizeof(size_t) * 2;
// parse LoDTensorDesc
LoDTensorProto desc;
desc.ParseFromArray(s.c_str() + sizeof(size_t) * 2, DESC_SIZE);
std::vector<int64_t> dims;
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
this->Resize(make_ddim(dims));
// parse data type
void* ptr = nullptr;
if (desc.data_type() == DataType::BOOL)
ptr = this->mutable_data<bool>(dst_place);
if (desc.data_type() == DataType::INT16)
ptr = this->mutable_data<int16_t>(dst_place);
if (desc.data_type() == DataType::INT32)
ptr = this->mutable_data<int32_t>(dst_place);
if (desc.data_type() == DataType::INT64)
ptr = this->mutable_data<int64_t>(dst_place);
// FIXME(dzh): there is no fp16 in standard c++
if (desc.data_type() == DataType::FP32)
ptr = this->mutable_data<float>(dst_place);
if (desc.data_type() == DataType::FP64)
ptr = this->mutable_data<double>(dst_place);
LoD lod;
std::vector<size_t> levels;
for (int i = 0; i < desc.levels().size(); ++i) {
auto current_level = desc.levels()[i].level();
std::copy(current_level.begin(), current_level.end(),
std::back_inserter(levels));
lod.emplace_back(levels);
levels.clear();
}
this->set_lod(lod);
if (platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), ptr, src_place,
s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
}
#ifdef PADDLE_WITH_GPU
if (platform::is_gpu_place(dst_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), ptr, src_place,
s.c_str() + sizeof(size_t) * 2 + DESC_SIZE, DATA_SIZE);
}
#endif
}
} // namespace framework
......
......@@ -25,6 +25,7 @@
#include "paddle/framework/ddim.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
namespace paddle {
namespace framework {
......@@ -39,23 +40,36 @@ using Vector = thrust::host_vector<
#endif
/*
* 3-level LoD stores
* LoD is short for Level of Details.
*
* 0 10 20
* 0 5 10 15 20
* 0 2 5 7 10 12 15 20
*
* - in a level, each element indicates offset in the underlying Tensor
* - in a level, each element indicates relative offset of the lower level
* - the first element should be 0 and that indicates that this sequence start
* from 0
* - each sequence's begin and end(no-inclusive) is level[id, id+1]
*
* For example:
* 3-level LoD stores
*
* 0 2 3
* 0 2 4 7
* 0 2 5 7 10 12 15 20
*/
using LoD = std::vector<Vector<size_t>>;
/*
* Slice levels from a LoD.
* NOTE the lowest level should always be the absolute offsets of the underlying
* tensor instances. So if higher layers are sliced without the lowest level,
* the lower level of the sliced LoD will be transformed to the absolute offset.
*/
LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end);
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
size_t elem_end);
/*
* Transform an LoD from relative offsets to absolute offsets.
*/
LoD ToAbsOffset(const LoD& in);
bool operator==(const LoD& a, const LoD& b);
......@@ -74,12 +88,12 @@ class LoDTensor : public Tensor {
LoD lod() const { return lod_; }
/*
* Get a element from LoD.
* Get the start offset and end offset of an element from LoD.
*/
size_t lod_element(size_t level, size_t elem) const {
std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const {
PADDLE_ENFORCE_LT(level, NumLevels());
PADDLE_ENFORCE_LT(elem, NumElements(level));
return (lod_)[level][elem];
return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]);
}
/*
......@@ -108,6 +122,12 @@ class LoDTensor : public Tensor {
*/
size_t NumElements(size_t level, size_t idx) const;
/*
* Get the number of instances in the underlying tensor in the `idx`-th
* element.
*/
size_t NumInstancesInElement(size_t level, size_t idx) const;
/*
* Shrink levels[level_begin:level_end]
*/
......@@ -119,8 +139,66 @@ class LoDTensor : public Tensor {
*/
void ShrinkInLevel(size_t level, size_t elem_begin, size_t elem_end);
/**
* @brief Serialize tensor to char bytes.
* Please check model_format.md for the format detail.
* NOTE: GPUTensor will copy data to cpu implicitly.
* @return return string
*/
// FIXME(dzh) : Currently, this interface should only be used in
// save/restore model and checkpoint. ParameterServer do not use shape
// information to do the optimization, as a result, when we serialize
// parameter/gradient to string, we should serialize the tensor
// to string in the ps trainer instead of LoDTensor.
std::string SerializeToString() const;
/**
* @brief Deserialize char bytes to tensor.
* @return return string
*/
void DeserializeFromString(const std::string& s,
const platform::Place& dst_place);
private:
LoD lod_;
};
/*
* Expand the `source` to fit the LoD of `lod`. For example, a `source`
* LoDTensor is
* - LoD: [0, 2]
* - tensor: [a0, a1]
* a `lod` is
* - LoD: [0 3 5]
* returns a new LoDTensor
* - [a0 a0 a0 a1 a1]
*/
template <typename T>
LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
const platform::Place& place) {
LoD abs_lod = ToAbsOffset(lod);
const auto& lod_level = lod[level];
size_t num_instances = source.dims()[0];
// new tensor
LoDTensor tensor;
tensor.set_lod(lod);
auto dims = source.dims();
dims[0] = lod_level.back();
tensor.Resize(dims);
tensor.mutable_data<T>(place);
PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1);
for (size_t ins = 0; ins < num_instances; ins++) {
for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
tensor.Slice(elem, elem + 1)
.CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(),
platform::CPUDeviceContext());
}
}
return tensor;
}
} // namespace framework
} // namespace paddle
......@@ -17,10 +17,13 @@
#include <gtest/gtest.h>
#include <algorithm>
#include <memory>
#include <vector>
namespace paddle {
namespace framework {
const int kLodTensorSize = 20 * 128;
class LoDTensorTester : public ::testing::Test {
public:
virtual void SetUp() override {
......@@ -30,15 +33,18 @@ class LoDTensorTester : public ::testing::Test {
// 0 5 10 15 20
// 0 2 5 7 10 12 15 20
LoD lod;
lod.push_back(std::vector<size_t>{0, 10, 20});
lod.push_back(std::vector<size_t>{0, 5, 10, 15, 20});
lod.push_back(std::vector<size_t>{0, 2, 3});
lod.push_back(std::vector<size_t>{0, 2, 5, 8});
lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
ASSERT_EQ(lod.size(), 3UL);
lod_tensor_.Resize({20 /*batch size*/, 128 /*dim*/});
// malloc memory
lod_tensor_.mutable_data<float>(place);
float* dst_ptr = lod_tensor_.mutable_data<float>(place);
for (int i = 0; i < kLodTensorSize; ++i) {
dst_ptr[i] = i;
}
lod_tensor_.set_lod(lod);
}
......@@ -52,14 +58,14 @@ TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor_.NumLevels(), 3UL); }
TEST_F(LoDTensorTester, NumElements) {
ASSERT_EQ(lod_tensor_.NumElements(0), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(1), 4UL);
ASSERT_EQ(lod_tensor_.NumElements(1), 3UL);
ASSERT_EQ(lod_tensor_.NumElements(2), 8UL);
}
TEST_F(LoDTensorTester, NumElements2) {
ASSERT_EQ(lod_tensor_.NumElements(0, 0), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(0, 1), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(1, 1), 2UL);
ASSERT_EQ(lod_tensor_.NumElements(0, 1), 1UL);
ASSERT_EQ(lod_tensor_.NumElements(1, 1), 3UL);
}
TEST_F(LoDTensorTester, ShrinkLevels) {
......@@ -68,17 +74,16 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
LoDTensor new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkLevels(level, level + 1);
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
}
// shrink 2 level
for (size_t level = 0; level < 2UL; ++level) {
LoDTensor new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkLevels(level, level + 2);
// the lowest level's last element should be the tensor's batch_size.
ASSERT_EQ(new_lod_tensor.lod().back().back(),
lod_tensor_.lod().back().back());
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level));
ASSERT_EQ(new_lod_tensor.NumElements(1),
lod_tensor_.NumElements(level + 1));
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
}
}
......@@ -86,20 +91,73 @@ TEST_F(LoDTensorTester, ShrinkLevels) {
TEST_F(LoDTensorTester, ShrinkInLevel) {
size_t level = 0;
LoDTensor new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkInLevel(level, 0, 2);
EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL);
EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL);
EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL);
EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL);
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
new_lod_tensor.ShrinkInLevel(level, 0, 1);
ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(2), 5UL);
ASSERT_EQ(new_lod_tensor.dims()[0], 12);
for (int i = 0; i < 12 * 128; i++) {
ASSERT_EQ(new_lod_tensor.data<float>()[i], i);
}
level = 1;
new_lod_tensor = lod_tensor_;
new_lod_tensor.ShrinkInLevel(level, 0, 2);
new_lod_tensor.ShrinkInLevel(level, 1, 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.data<float>(), lod_tensor_.data<float>());
ASSERT_EQ(new_lod_tensor.NumElements(0), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 3UL);
ASSERT_EQ(new_lod_tensor.dims()[0], 7);
for (int i = 5 * 128; i < 12 * 128; i++) {
ASSERT_EQ(new_lod_tensor.data<float>()[i - 5 * 128], i);
}
LoDTensor t1;
t1.set_lod(lod_tensor_.lod());
t1.ShareDataWith(lod_tensor_);
LoDTensor t2;
t2.set_lod(lod_tensor_.lod());
t2.ShareDataWith(lod_tensor_);
t1.ShrinkInLevel(0, 1, 2);
t2.ShrinkInLevel(0, 0, 1);
EXPECT_NE(t1.data<float>(), t2.data<float>());
EXPECT_NE(t1.data<float>(), lod_tensor_.data<float>());
}
TEST(LodExpand, test) {
LoD lod{{0, 2}};
LoDTensor tensor;
tensor.set_lod(lod);
tensor.Resize({2, 1});
tensor.mutable_data<float>(platform::CPUPlace());
tensor.data<float>()[0] = 0;
tensor.data<float>()[1] = 1;
LoD target;
target.emplace_back(std::vector<size_t>{0, 3, 5});
auto new_tensor = LodExpand<float>(tensor, target, 0UL, platform::CPUPlace());
std::vector<int> result{{0, 0, 0, 1, 1}};
for (size_t i = 0; i < 5; i++) {
ASSERT_EQ(new_tensor.data<float>()[i], result[i]);
}
}
TEST_F(LoDTensorTester, SerializeDeserialize) {
LoDTensor new_lod_tensor = lod_tensor_;
float* src_ptr = lod_tensor_.data<float>();
std::string s = lod_tensor_.SerializeToString();
LoDTensor dst;
dst.DeserializeFromString(s, platform::CPUPlace());
float* dst_ptr = dst.data<float>();
for (int i = 0; i < kLodTensorSize; ++i) {
EXPECT_EQ(dst_ptr[i], src_ptr[i]);
}
ASSERT_EQ(dst.NumElements(0), 2UL);
ASSERT_EQ(dst.NumElements(1), 3UL);
ASSERT_EQ(dst.NumElements(2), 8UL);
}
} // namespace framework
......
......@@ -36,8 +36,8 @@ TEST(LoDTensor, LoDInGPU) {
lod_tensor.mutable_data<float>(place);
lod_tensor.set_lod(src_lod);
CHECK_EQ(lod_tensor.lod_element(0, 2), 4UL);
CHECK_EQ(lod_tensor.lod_element(0, 4), 8UL);
CHECK_EQ(lod_tensor.lod_element(0, 2).first, 4UL);
CHECK_EQ(lod_tensor.lod_element(0, 4).first, 8UL);
auto lod = lod_tensor.lod();
......@@ -48,3 +48,30 @@ TEST(LoDTensor, LoDInGPU) {
CHECK_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2);
}
}
TEST(LoDTensor, SerializeDeserialize) {
paddle::framework::LoDTensor lod_tensor;
paddle::platform::GPUPlace place(0);
paddle::framework::LoD src_lod;
src_lod.push_back(std::vector<size_t>{0, 2, 4, 6, 8, 10, 12, 14});
lod_tensor.Resize({14, 16});
lod_tensor.mutable_data<float>(place);
lod_tensor.set_lod(src_lod);
CHECK_EQ(lod_tensor.lod_element(0, 2).first, 4UL);
CHECK_EQ(lod_tensor.lod_element(0, 4).first, 8UL);
test<<<1, 8>>>(src_lod[0].data(), src_lod[0].size());
cudaDeviceSynchronize();
std::string s = lod_tensor.SerializeToString();
paddle::framework::LoDTensor dst;
dst.DeserializeFromString(s, place);
paddle::framework::LoD dst_lod = dst.lod();
for (size_t i = 0; i < dst_lod[0].size(); ++i) {
CHECK_EQ(src_lod[0].data()[i], dst_lod[0].data()[i] * 2);
}
}
......@@ -87,11 +87,8 @@ class OpInfoMap {
}
}
template <typename Callback>
void IterAllInfo(Callback callback) {
for (auto& it : map_) {
callback(it.first, it.second);
}
const std::unordered_map<std::string, const OpInfo>& map() const {
return map_;
}
private:
......
......@@ -44,6 +44,11 @@ class OpProtoAndCheckerMaker {
var_->set_intermediate(true);
return *this;
}
VariableBuilder& AsDispensable() {
var_->set_dispensable(true);
return *this;
}
};
VariableBuilder AddInput(const std::string& name, const std::string& comment);
......
......@@ -43,12 +43,13 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
return ret_val;
}
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc,
ProgramDesc* program) {
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = GetAttrValue(attr);
attrs[attr.name()] = GetAttrValue(attr, program);
}
return CreateOp(op_desc.type(), inputs, outputs, attrs);
......
......@@ -20,6 +20,8 @@ limitations under the License. */
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include "glog/logging.h" // For VLOG()
#include "paddle/framework/attribute.h"
#include "paddle/framework/details/op_registry.h"
#include "paddle/framework/framework.pb.h"
......@@ -45,18 +47,15 @@ class Registrar {
template <typename... ARGS>
struct OperatorRegistrar : public Registrar {
explicit OperatorRegistrar(const char* op_type) : op_type(op_type) {
explicit OperatorRegistrar(const char* op_type) {
PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type),
"'%s' is registered more than once.", op_type);
static_assert(sizeof...(ARGS) != 0,
"OperatorRegistrar should be invoked at least by OpClass");
OpInfo info;
details::OperatorRegistrarRecursive<0, false, ARGS...>(op_type, &info);
OpInfoMap::Instance().Insert(op_type, info);
}
const char* op_type;
OpInfo info;
};
class OpRegistry {
......@@ -77,7 +76,8 @@ class OpRegistry {
const VariableNameMap& outputs,
AttributeMap attrs);
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc,
ProgramDesc* program);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
};
......
......@@ -74,7 +74,7 @@ TEST(OpRegistry, CreateOp) {
attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(scale);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx);
......@@ -95,7 +95,7 @@ TEST(OpRegistry, IllegalAttr) {
bool caught = false;
try {
paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
} catch (paddle::platform::EnforceNotMet err) {
caught = true;
std::string msg = "larger_than check fail";
......@@ -115,7 +115,7 @@ TEST(OpRegistry, DefaultValue) {
ASSERT_TRUE(op_desc.IsInitialized());
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx);
......@@ -131,7 +131,7 @@ TEST(OpRegistry, CustomChecker) {
// attr 'test_attr' is not set
bool caught = false;
try {
paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
} catch (paddle::platform::EnforceNotMet err) {
caught = true;
std::string msg = "Attribute 'test_attr' is required!";
......@@ -149,7 +149,7 @@ TEST(OpRegistry, CustomChecker) {
attr->set_i(3);
caught = false;
try {
paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
} catch (paddle::platform::EnforceNotMet err) {
caught = true;
std::string msg = "'test_attr' must be even!";
......@@ -166,7 +166,7 @@ TEST(OpRegistry, CustomChecker) {
attr->set_name("test_attr");
attr->set_type(paddle::framework::AttrType::INT);
attr->set_i(4);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
paddle::platform::CPUDeviceContext dev_ctx;
paddle::framework::Scope scope;
op->Run(scope, dev_ctx);
......
......@@ -252,5 +252,20 @@ std::ostream& operator<<(std::ostream& os,
return os;
}
bool OpSupportGPU(const std::string& op_type) {
auto& all_kernels = OperatorWithKernel::AllOpKernels();
auto it = all_kernels.find(op_type);
if (it == all_kernels.end()) {
// All control operator must support GPU
return true;
}
for (auto& kern_pair : it->second) {
if (platform::is_gpu_place(kern_pair.first.place_)) {
return true;
}
}
return false;
}
} // namespace framework
} // namespace paddle
......@@ -20,12 +20,13 @@ limitations under the License. */
#include <unordered_map>
#include <vector>
#include "op_info.h"
#include "glog/logging.h" // For VLOG
#include "paddle/framework/attribute.h"
#include "paddle/framework/block_desc.h"
#include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/shape_inference.h"
#include "paddle/framework/tensor.h"
......@@ -326,37 +327,47 @@ class CompileTimeInferShapeContext : public InferShapeContext {
bool HasInput(const std::string& name) const override {
const std::vector<std::string>& input_names = op_.Input(name);
auto length = input_names.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL,
"Input(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVar(input_names[0]);
return block_.HasVarRecursive(input_names[0]);
}
bool HasOutput(const std::string& name) const override {
const std::vector<std::string>& output_names = op_.Output(name);
auto length = output_names.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL,
"Output(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVar(output_names[0]);
return block_.HasVarRecursive(output_names[0]);
}
bool HasInputs(const std::string& name) const override {
const std::vector<std::string>& input_names = op_.Input(name);
PADDLE_ENFORCE(!input_names.empty(), "Inputs(%s) length is 0", name);
if (input_names.empty()) {
return false;
}
for (auto& input : input_names) {
if (!block_.HasVar(input)) return false;
if (!block_.HasVarRecursive(input)) return false;
}
return true;
}
bool HasOutputs(const std::string& name) const override {
const std::vector<std::string>& output_names = op_.Output(name);
PADDLE_ENFORCE(!output_names.empty(), "Inputs(%s) length is 0", name);
if (output_names.empty()) {
return false;
}
for (auto& output : output_names) {
if (!block_.HasVar(output)) return false;
if (!block_.HasVarRecursive(output)) return false;
}
return true;
}
......@@ -403,11 +414,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {
private:
DDim GetDim(const std::string& name) const override {
return framework::make_ddim(block_.FindVar(name)->Shape());
return framework::make_ddim(block_.FindVarRecursive(name)->Shape());
}
void SetDim(const std::string& name, const DDim& dim) override {
block_.FindVar(name)->SetShape(framework::vectorize(dim));
block_.FindVarRecursive(name)->SetShape(framework::vectorize(dim));
}
const OpDescBind& op_;
......@@ -420,13 +431,27 @@ class RuntimeInferShapeContext : public InferShapeContext {
: op_(op), scope_(scope) {}
bool HasInput(const std::string& name) const override {
auto ipt = op_.Input(name);
auto& ins = Inputs(name);
size_t length = ins.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL, "Input %s should have more than one inputs",
name);
auto ipt = ins[0];
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr;
}
bool HasOutput(const std::string& name) const override {
auto ipt = op_.Output(name);
auto& outs = Outputs(name);
size_t length = outs.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL, "Output %s should have more than one inputs",
name);
auto ipt = outs[0];
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr;
}
......@@ -573,6 +598,7 @@ class OperatorWithKernel : public OperatorBase {
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const final {
VLOG(3) << "Running operator " << this->Type();
RuntimeInferShapeContext infer_shape_ctx(*this, scope);
this->InferShape(&infer_shape_ctx);
......@@ -647,5 +673,7 @@ class OperatorWithKernel : public OperatorBase {
std::ostream& operator<<(std::ostream& os,
const OperatorWithKernel::OpKernelKey& kernel_key);
extern bool OpSupportGPU(const std::string& op_type);
} // namespace framework
} // namespace paddle
......@@ -83,7 +83,7 @@ TEST(OperatorBase, all) {
paddle::platform::CPUDeviceContext device_context;
paddle::framework::Scope scope;
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
scope.Var("OUT1");
ASSERT_EQ(paddle::framework::op_run_num, 0);
op->Run(scope, device_context);
......@@ -208,7 +208,7 @@ TEST(OpKernel, all) {
paddle::platform::CPUDeviceContext cpu_device_context;
paddle::framework::Scope scope;
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 0);
op->Run(scope, cpu_device_context);
ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 1);
......@@ -244,7 +244,7 @@ TEST(OpKernel, multi_inputs) {
scope.Var("y0")->GetMutable<Tensor>();
scope.Var("y1")->GetMutable<Tensor>();
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc, nullptr);
op->Run(scope, cpu_device_context);
}
......
......@@ -18,27 +18,10 @@ limitations under the License. */
namespace paddle {
namespace framework {
using ProgDescMap =
std::unordered_map<ProgramDesc *, std::unique_ptr<ProgramDescBind>>;
static ProgDescMap *g_bind_map = nullptr;
ProgramDescBind &ProgramDescBind::Instance(ProgramDesc *prog) {
if (g_bind_map == nullptr) {
g_bind_map = new ProgDescMap();
}
auto &map = *g_bind_map;
auto &ptr = map[prog];
if (ptr == nullptr) {
ptr.reset(new ProgramDescBind(prog));
}
return *ptr;
}
BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) {
auto *b = prog_->add_blocks();
auto *b = prog_.add_blocks();
b->set_parent_idx(parent.ID());
b->set_idx(prog_->blocks_size() - 1);
b->set_idx(prog_.blocks_size() - 1);
blocks_.emplace_back(new BlockDescBind(this, b));
return blocks_.back().get();
}
......@@ -47,13 +30,22 @@ ProgramDesc *ProgramDescBind::Proto() {
for (auto &block : blocks_) {
block->Flush();
}
return prog_;
return &prog_;
}
ProgramDescBind::ProgramDescBind() {
auto *block = prog_.mutable_blocks()->Add();
block->set_idx(kRootBlockIndex);
block->set_parent_idx(kNoneBlockIndex);
blocks_.emplace_back(new BlockDescBind(this, block));
}
ProgramDescBind::ProgramDescBind(ProgramDesc *prog) {
prog_ = prog;
for (auto &block : *prog->mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block));
ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) {
prog_ = o.prog_;
for (int i = 0; i < prog_.blocks_size(); ++i) {
auto *block = prog_.mutable_blocks(i);
blocks_.emplace_back(new BlockDescBind(*o.blocks_[i], block, this));
}
}
} // namespace framework
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/platform/macros.h"
namespace paddle {
......@@ -26,7 +27,9 @@ class BlockDescBind;
class ProgramDescBind {
public:
static ProgramDescBind &Instance(ProgramDesc *prog);
ProgramDescBind();
ProgramDescBind(const ProgramDescBind &o);
BlockDescBind *AppendBlock(const BlockDescBind &parent);
......@@ -37,14 +40,9 @@ class ProgramDescBind {
ProgramDesc *Proto();
private:
explicit ProgramDescBind(ProgramDesc *prog);
// Not owned
ProgramDesc *prog_;
ProgramDesc prog_;
std::vector<std::unique_ptr<BlockDescBind>> blocks_;
DISABLE_COPY_AND_ASSIGN(ProgramDescBind);
};
} // namespace framework
} // namespace paddle
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
namespace framework {
// The Index of first Block in Program. also called root block.
constexpr int kRootBlockIndex = 0;
// The Parent Index of root Block, this block does not exist.
constexpr int kNoneBlockIndex = -1;
} // namespace framework
} // namespace paddle
此差异已折叠。
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/framework.pb.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace framework {
void Prune(const ProgramDesc& input, ProgramDesc& output);
} // namespace framework
} // namespace paddle
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -18,6 +18,10 @@ limitations under the License. */
namespace paddle {
namespace framework {
VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); }
void VarDescBind::SetType(VarDesc::VarType type) { desc_.set_type(type); }
void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
}
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册