“0d8b37ce48bcb3cca2f5cb2d313908220e48a01d”上不存在“...multi_language_interface/01.inference_implementation.html”
提交 68483f95 编写于 作者: Y Yu Yang

Merge branch 'develop' of github.com:baidu/Paddle into feature/add_persistable_in_var_desc

...@@ -86,6 +86,14 @@ if(ANDROID OR IOS) ...@@ -86,6 +86,14 @@ if(ANDROID OR IOS)
"Disable MKLDNN when cross-compiling for Android and iOS" FORCE) "Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
set(WITH_MKLML OFF CACHE STRING set(WITH_MKLML OFF CACHE STRING
"Disable MKLML package when cross-compiling for Android and iOS" FORCE) "Disable MKLML package when cross-compiling for Android and iOS" FORCE)
# Compile PaddlePaddle mobile inference library
if (NOT WITH_C_API)
set(WITH_C_API ON CACHE STRING
"Always compile the C_API when cross-compiling for Android and iOS" FORCE)
endif()
set(MOBILE_INFERENCE ON)
add_definitions(-DPADDLE_MOBILE_INFERENCE)
endif() endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
...@@ -160,9 +168,11 @@ endif(USE_NNPACK) ...@@ -160,9 +168,11 @@ endif(USE_NNPACK)
add_subdirectory(proto) add_subdirectory(proto)
# "add_subdirectory(go)" should be placed after the following loine, if(NOT MOBILE_INFERENCE)
# because it depends on paddle/optimizer. # "add_subdirectory(go)" should be placed after the following loine,
add_subdirectory(paddle/optimizer) # because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
endif()
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# placed after this block, because they depends on it. # placed after this block, because they depends on it.
......
...@@ -53,7 +53,8 @@ if(NOT WITH_GPU) ...@@ -53,7 +53,8 @@ if(NOT WITH_GPU)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else() else()
add_definitions(-DPADDLE_WITH_GPU) add_definitions(-DPADDLE_WITH_CUDA)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
......
...@@ -73,25 +73,43 @@ function(link_paddle_exe TARGET_NAME) ...@@ -73,25 +73,43 @@ function(link_paddle_exe TARGET_NAME)
generate_rdma_links() generate_rdma_links()
endif() endif()
target_circle_link_libraries(${TARGET_NAME} if(MOBILE_INFERENCE)
ARCHIVE_START target_circle_link_libraries(${TARGET_NAME}
paddle_gserver ARCHIVE_START
paddle_function paddle_gserver
ARCHIVE_END paddle_function
paddle_pserver ARCHIVE_END
paddle_trainer_lib paddle_math
paddle_network paddle_utils
paddle_math paddle_parameter
paddle_utils paddle_proto
paddle_parameter paddle_cuda
paddle_proto ${EXTERNAL_LIBS}
paddle_cuda ${CMAKE_THREAD_LIBS_INIT}
paddle_optimizer ${CMAKE_DL_LIBS}
${EXTERNAL_LIBS} ${RDMA_LD_FLAGS}
${CMAKE_THREAD_LIBS_INIT} ${RDMA_LIBS})
${CMAKE_DL_LIBS} else()
${RDMA_LD_FLAGS} target_circle_link_libraries(${TARGET_NAME}
${RDMA_LIBS}) ARCHIVE_START
paddle_gserver
paddle_function
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
paddle_network
paddle_math
paddle_utils
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(ANDROID) if(ANDROID)
target_link_libraries(${TARGET_NAME} log) target_link_libraries(${TARGET_NAME} log)
......
...@@ -55,17 +55,23 @@ Let us consolidate the discussion by presenting some examples. ...@@ -55,17 +55,23 @@ Let us consolidate the discussion by presenting some examples.
The following C++ programs shows how blocks are used with the `if-else` structure: The following C++ programs shows how blocks are used with the `if-else` structure:
```c++ ```c++
namespace pd = paddle;
int x = 10; int x = 10;
int y = 20; int y = 1;
int out; int z = 10;
bool cond = false; bool cond = false;
int o1, o2;
if (cond) { if (cond) {
int z = x + y; int z = x + y;
out = softmax(z); o1 = z;
o2 = pd::layer::softmax(z);
} else { } else {
int z = fc(x); int d = pd::layer::fc(z);
out = z; o1 = d;
o2 = d+1;
} }
``` ```
An equivalent PaddlePaddle program from the design doc of the [IfElseOp operator](./if_else_op.md) is as follows: An equivalent PaddlePaddle program from the design doc of the [IfElseOp operator](./if_else_op.md) is as follows:
...@@ -73,57 +79,55 @@ An equivalent PaddlePaddle program from the design doc of the [IfElseOp operator ...@@ -73,57 +79,55 @@ An equivalent PaddlePaddle program from the design doc of the [IfElseOp operator
```python ```python
import paddle as pd import paddle as pd
x = var(10) x = minibatch([10, 20, 30]) # shape=[None, 1]
y = var(20) y = var(1) # shape=[1], value=1
cond = var(false) z = minibatch([10, 20, 30]) # shape=[None, 1]
ie = pd.create_ifelseop(inputs=[x], output_num=1) cond = larger_than(x, 15) # [false, true, true]
ie = pd.ifelse()
with ie.true_block(): with ie.true_block():
x = ie.inputs(true, 0) d = pd.layer.add_scalar(x, y)
z = operator.add(x, y) ie.output(d, pd.layer.softmax(d))
ie.set_output(true, 0, operator.softmax(z))
with ie.false_block(): with ie.false_block():
x = ie.inputs(false, 0) d = pd.layer.fc(z)
z = layer.fc(x) ie.output(d, d+1)
ie.set_output(true, 0, operator.softmax(z)) o1, o2 = ie(cond)
out = b(cond)
``` ```
In both examples, the left branch computes `softmax(x+y)` and the right branch computes `fc(x)`. In both examples, the left branch computes `x+y` and `softmax(x+y)`, the right branch computes `x+1` and `fc(x)`.
A difference is that variables in the C++ program contain scalar values, whereas those in the PaddlePaddle programs are mini-batches of instances. The `ie.input(true, 0)` invocation returns instances in the 0-th input, `x`, that corresponds to true values in `cond` as the local variable `x`, where `ie.input(false, 0)` returns instances corresponding to false values. A difference is that variables in the C++ program contain scalar values, whereas those in the PaddlePaddle programs are mini-batches of instances. The `ie.input(true, 0)` invocation returns instances in the 0-th input, `x`, that corresponds to true values in `cond` as the local variable `x`, where `ie.input(false, 0)` returns instances corresponding to false values.
### Blocks with `for` and `RNNOp` ### Blocks with `for` and `RNNOp`
The following RNN model from the [RNN design doc](./rnn.md) The following RNN model from the [RNN design doc](./rnn.md)
```python ```python
x = sequence([10, 20, 30]) x = sequence([10, 20, 30]) # shape=[None, 1]
m = var(0) m = var(0) # shape=[1]
W = tensor() W = var(0.314, param=true) # shape=[1]
U = tensor() U = var(0.375, param=true) # shape=[1]
rnn = create_rnn(inputs=[input]) rnn = pd.rnn()
with rnn.stepnet() as net: with rnn.step():
x = net.set_inputs(0) h = rnn.memory(init = m)
h = net.add_memory(init=m) hh = rnn.previous_memory(h)
fc_out = pd.matmul(W, x) a = layer.fc(W, x)
hidden_out = pd.matmul(U, h.pre(n=1)) b = layer.fc(U, hh)
sum = pd.add_two(fc_out, hidden_out) s = pd.add(a, b)
act = pd.sigmoid(sum) act = pd.sigmoid(s)
h.update(act) # update memory with act rnn.update_memory(h, act)
net.set_outputs(0, act, hidden_out) # two outputs rnn.output(a, b)
o1, o2 = rnn() o1, o2 = rnn()
print o1, o2
``` ```
has its equivalent C++ program as follows has its equivalent C++ program as follows
```c++ ```c++
int* x = {10, 20, 30}; int* x = {10, 20, 30};
int m = 0; int* m = {0};
int W = some_value(); int* W = {0.314};
int U = some_other_value(); int* U = {0.375};
int mem[sizeof(x) / sizeof(x[0]) + 1]; int mem[sizeof(x) / sizeof(x[0]) + 1];
int o1[sizeof(x) / sizeof(x[0]) + 1]; int o1[sizeof(x) / sizeof(x[0]) + 1];
...@@ -131,20 +135,16 @@ int o2[sizeof(x) / sizeof(x[0]) + 1]; ...@@ -131,20 +135,16 @@ int o2[sizeof(x) / sizeof(x[0]) + 1];
for (int i = 1; i <= sizeof(x)/sizeof(x[0]); ++i) { for (int i = 1; i <= sizeof(x)/sizeof(x[0]); ++i) {
int x = x[i-1]; int x = x[i-1];
if (i == 1) mem[0] = m; if (i == 1) mem[0] = m;
int fc_out = W * x; int a = W * x;
int hidden_out = Y * mem[i-1]; int b = Y * mem[i-1];
int sum = fc_out + hidden_out; int s = fc_out + hidden_out;
int act = sigmoid(sum); int act = sigmoid(sum);
mem[i] = act; mem[i] = act;
o1[i] = act; o1[i] = act;
o2[i] = hidden_out; o2[i] = hidden_out;
} }
print_array(o1);
print_array(o2);
``` ```
## Compilation and Execution ## Compilation and Execution
Like TensorFlow programs, a PaddlePaddle program is written in Python. The first part describes a neural network as a protobuf message, and the rest part executes the message for training or inference. Like TensorFlow programs, a PaddlePaddle program is written in Python. The first part describes a neural network as a protobuf message, and the rest part executes the message for training or inference.
...@@ -210,11 +210,11 @@ a = pd.Varaible(shape=[20, 20]) ...@@ -210,11 +210,11 @@ a = pd.Varaible(shape=[20, 20])
b = pd.fc(a, params=["fc.w", "fc.b"]) b = pd.fc(a, params=["fc.w", "fc.b"])
rnn = pd.create_rnn() rnn = pd.create_rnn()
with rnn.stepnet() as net: with rnn.stepnet()
x = net.set_inputs(a) x = a.as_step_input()
# reuse fc's parameter # reuse fc's parameter
fc_without_b = pd.get_variable("fc.w") fc_without_b = pd.get_variable("fc.w")
net.set_outputs(fc_without_b) rnn.output(fc_without_b)
out = rnn() out = rnn()
``` ```
......
IfOp should have only one branch. An IfOp operator takes a `cond` variable whose value must be a vector of N boolean elements. Its return value has N instances. If cond[i] == True, input instance input[i] will go through true_block() and generate output[i]; otherwise it will produce output from false_bloack(). # The `IfElse` Operator
```python PaddlePaddle's `IfElse` operator differs from TensorFlow's:
import paddle as pd
x = var() - the TensorFlow version takes a scalar boolean value as the condition so that the whole mini-batch goes to either the true or the false branch, whereas
y = var() - the PaddlePaddle version takes a vector of boolean value as the condition, and instances corresponding to true values go to the true branch, those corresponding to false values go to the false branch.
cond = var()
default_value = var() ## Example
b = pd.create_ifelseop(inputs=[x], output_num=1)
with b.true_block(): The following PaddlePaddle program shows the usage of the IfElse operator:
x = b.inputs(0)
z = operator.add(x, y)
b.set_output(0, operator.softmax(z))
with b.false_block():
x = b.inputs(0)
z = layer.fc(x)
b.set_output(0, operator.softmax(z))
out = b(cond)
```
If only true_block is set in an IfElseOp, a special case is that we can have a default value for false as:
```python ```python
import paddle as pd import paddle as pd
x = var() x = minibatch([10, 20, 30]) # shape=[None, 1]
y = var() y = var(1) # shape=[1], value=1
cond = var() z = minibatch([10, 20, 30]) # shape=[None, 1]
default_value = var() cond = larger_than(x, 15) # [false, true, true]
b = pd.create_ifelseop(inputs=[x], output_num=1, default_value)
ie = pd.ifelse()
with b.true_block(): with ie.true_block():
x = b.inputs(0) d = pd.layer.add(x, y)
z = operator.add(x, y) ie.output(d, pd.layer.softmax(d))
b.set_output(0, operator.softmax(z)) with ie.false_block():
d = pd.layer.fc(z)
ie.output(d, d+1)
o1, o2 = ie(cond)
```
out = b(cond) A challenge to implement the `IfElse` operator is to infer those variables to be split, or, say, to identify the variable of the mini-batch or those derived from the mini-batch.
An equivalent C++ program is as follows:
```c++
namespace pd = paddle;
int x = 10;
int y = 1;
int z = 10;
bool cond = false;
int o1, o2;
if (cond) {
int d = x + y;
o1 = z;
o2 = pd::layer::softmax(z);
} else {
int d = pd::layer::fc(z);
o1 = d;
o2 = d+1;
}
``` ```
where default_value is a list of vars for `cond` == False.
# Design Doc: ProgramDesc # Design Doc: PaddlePaddle Programs
The basic structure of a PaddlePaddle program is some nested blocks, as a C++ or Java program. ## Compile and Execution
A PaddlePaddle program consists of two parts -- the first generates a `ProgramDesc` protobuf message that describes the program, and the second runs this message using a C++ class `Executor`.
As described in [graph.md](./graph.md), the first five lines of the following PaddlePaddle program A simple example PaddlePaddle program can be found in [graph.md](./graph.md):
```python ```python
x = layer.data("images") x = layer.data("images")
...@@ -13,36 +15,112 @@ optimize(cost) ...@@ -13,36 +15,112 @@ optimize(cost)
train(cost, reader=mnist.train()) train(cost, reader=mnist.train())
``` ```
generates, or compiles, a PaddelPaddle program, which is represented by the following protobuf message: The first five lines of the following PaddlePaddle program generates, or, compiles, the `ProgramDesc` message. The last line runs it.
```protobuf ## Programs and Blocks
message ProgramDesc {
repeated BlockDesc blocks = 1; The basic structure of a PaddlePaddle program is some nested blocks, as a C++ or Java program.
- program: some nested blocks
- [block](./block.md):
- some local variable definitions, and
- a sequence of operators
The concept of block comes from usual programs. For example, the following C++ program has three blocks:
```c++
int main() { // block 0
int i = 0;
if (i < 10) { // block 1
for (int j = 0; j < 10; j++) { // block 2
}
}
return 0;
} }
```
The following PaddlePaddle program has three blocks:
```python
import paddle as pd // block 0
x = minibatch([10, 20, 30]) # shape=[None, 1]
y = var(1) # shape=[1], value=1
z = minibatch([10, 20, 30]) # shape=[None, 1]
cond = larger_than(x, 15) # [false, true, true]
ie = pd.ifelse()
with ie.true_block(): // block 1
d = pd.layer.add_scalar(x, y)
ie.output(d, pd.layer.softmax(d))
with ie.false_block(): // block 2
d = pd.layer.fc(z)
ie.output(d, d+1)
o1, o2 = ie(cond)
```
## `BlockDesc` and `ProgramDesc`
All protobuf messages are defined in `framework.proto`.
`BlockDesc` is straight-forward -- it includes local variable definitions, `vars`, and a sequence of operators, `ops`.
```protobuf
message BlockDesc { message BlockDesc {
required int32 parent = 1; required int32 parent = 1;
repeated VarDesc vars = 2; repeated VarDesc vars = 2;
repeated OpDesc ops = 3; repeated OpDesc ops = 3;
} }
```
The parent ID indicates the parent block so that operators in a block can refer to variables defined locally and also those defined in their ancestor blocks.
All hierarchical blocks in a program are flattened and stored in an array. The block ID is the index of the block in this array.
```protobuf
message ProgramDesc {
repeated BlockDesc blocks = 1;
}
```
### Global Block
The global block is the first one in the above array.
## Operators that Use Blocks
In the above example, the operator `IfElseOp` has two blocks -- the true branch and the false branch.
The definition of `OpDesc` shows that an operator could have some attributes:
```protobuf
message OpDesc { message OpDesc {
AttrDesc attrs = 1; AttrDesc attrs = 1;
... ...
} }
```
and an attribute could be of type block, which is, in fact, a block ID as described above:
```
message AttrDesc { message AttrDesc {
required AttrType type = 1; required string name = 1;
// index into ProgramDesc::blocks when type==BLOCK enum AttrType {
optional int32 block = 2; INT = 1,
STRING = 2,
...
BLOCK = ...
}
required AttrType type = 2;
optional int32 block = 10; // when type == BLOCK
... ...
} }
``` ```
When each of the first five lines runs, related Python function, e.g., `layer.fc`, calls C++ InferShape functions. This InferShape function needs to access the properties of VarDesc's accessed by the current OpDesc. These VarDesc's might not be defined in the current block, but in some ancestor blocks. This requires that we can trace the parent of a block. ## InferShape
A nested block is often an attribute of an operator, most likely, an IfElseOp or a WhileOp. In above solution, all blocks are in `ProgramDesc::blocks`, this implicitly assigns a zero-based ID to each block -- the index of the block in `ProgramDesc::blocks`. So that `AttrDesc::block` could be an integer block ID.
With this design, the InferShape function should take the following parameters: With this design, the InferShape function should take the following parameters:
......
...@@ -15,9 +15,9 @@ Please be aware that these Python classes need to maintain some construction-tim ...@@ -15,9 +15,9 @@ Please be aware that these Python classes need to maintain some construction-tim
### Program ### Program
A `ProgramDesc` describes a [DL program](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md), which is composed of an array of `BlockDesc`s. A `BlockDesc` refers to its parent block by its index in the array. For example, operators in the step block of an RNN operator needs to be able to access variables in its ancessor blocks. A `ProgramDesc` describes a [DL program](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md), which is composed of an array of `BlockDesc`s. The `BlockDesc`s in a `ProgramDesc` can have a tree-like hierarchical structure. However, the `ProgramDesc` onlys stores a flattened array of `BlockDesc`s. A `BlockDesc` refers to its parent block by its index in the array. For example, operators in the step block of an RNN operator need to be able to access variables in its ancestor blocks.
Whenever we create a block, we need set its parent block to the current block, so the Python class `Program` needs to maintain a data member `current_block`. Whenever we create a block, we need to set its parent block to the current block, hence the Python class `Program` needs to maintain a data member `current_block`.
```python ```python
class Program(objects): class Program(objects):
...@@ -81,13 +81,13 @@ class Block(objects): ...@@ -81,13 +81,13 @@ class Block(objects):
self.ops.prepend(Operator(self, ...)) self.ops.prepend(Operator(self, ...))
``` ```
`create_parameter` is necessary because parameters are global variables, those defined in the global block, but can be created in some sub-blocks, e.g., an FC layer in the step block of an RNN operator. `create_parameter` is necessary because parameters are global variables, defined in the global block, but can be created in some sub-blocks. For example, an FC layer in the step block of an RNN operator.
`prepand_operator` is necessary because the constructor of `Parameter` needs to create the initialize (or load) operator of the parameter, and would like to put it in the *preamble* of the global block. `prepend_operator` is necessary because the constructor of `Parameter` needs to create the initialize (or load) operator of the parameter, and would like to put it in the *preamble* of the global block.
### Operator ### Operator
The `Operator` class fills in the `OpDesc` message and calls the C++ function `InferShape` to infer output shape from input shape. The `Operator` class fills in the `OpDesc` message and calls the C++ function `InferShape` to infer the output shapes from the input shapes.
```python ```python
class Operator(object): class Operator(object):
...@@ -105,7 +105,7 @@ class Operator(object): ...@@ -105,7 +105,7 @@ class Operator(object):
return self.proto.type() return self.proto.type()
``` ```
`Operator` creates the `OpDesc` message in C++ space, so could it call the `InferShape` function, which is in C++. `Operator` creates the `OpDesc` message in C++ space, so that it can call the `InferShape` function, which is in C++.
### Variable ### Variable
...@@ -128,7 +128,7 @@ class Variable(object): ...@@ -128,7 +128,7 @@ class Variable(object):
self.writer = None self.writer = None
``` ```
Please be aware of `self.writer`, that tracks operator who creates the variable. It possible that there are more than one operators who write a variable, but in Python space, each writes to a variable is represented by a Variable class. This is guaranteed by the fact that **`core.NewVarDesc` must NOT create a new `VarDesc` message if its name already exists in the specified block**. Please be aware of `self.writer`, that tracks operator who creates the variable. It possible that there are more than one operators who write a variable, but in Python space, each write to a variable is represented by a Variable class. This is guaranteed by the fact that **`core.NewVarDesc` must NOT create a new `VarDesc` message if its name already exists in the specified block**.
### Parameter ### Parameter
...@@ -155,7 +155,7 @@ class Parameter(Variable): ...@@ -155,7 +155,7 @@ class Parameter(Variable):
initialize_op_attrs) initialize_op_attrs)
``` ```
When users create a parameter, s/he can call When users create a parameter, they can call
```python ```python
program.create_parameter( program.create_parameter(
......
# Design for TensorArray # Design for TensorArray
This design doc presents the necessity of a new C++ class `TensorArray`.
In addition to the very simple C++ implementation
```c++
class TensorArray {
public:
explicit TensorArray(const LoDTensor&);
explicit TensorArray(size_t size);
private:
vector<LoDTensor> values_;
};
```
We also need to expose it to PaddlePaddle's Python API,
because users would want to use it with our very flexible operators `WhileLoop`.
An example for a RNN based on dynamic operators is
```python
input = pd.data(...)
num_steps = Var(12)
TensorArray states(size=num_steps)
TensorArray step_inputs(unstack_from=input)
TensorArray step_outputs(size=num_steps)
W = Tensor(...)
U = Tensor(...)
default_state = some_op()
step = Var(1)
wloop = paddle.create_whileloop(loop_vars=[step])
with wloop.frame():
wloop.break_if(pd.equal(step, num_steps)
pre_state = states.read(step-1, default_state)
step_input = step_inputs.read(step)
state = pd.sigmoid(pd.matmul(U, pre_state) + pd.matmul(W, step_input))
states.write(step, state)
step_outputs.write(step, state) # output state
step.update(state+1)
output = step_outputs.stack()
```
## Background
Steps are one of the core concepts of RNN. In each time step of RNN, there should be several input segments, states, and output segments; all these components act like arrays, for example, call `states[step_id]` will get the state in `step_id`th time step.
An RNN can be implemented with the following pseudocode
```c++
Array states;
Array input_segments;
Array output_segments;
Parameter W, U;
step = 1
seq_len = 12
while_loop {
if (step == seq_len) break;
states[step] = sigmoid(W * states[step-1] + U * input_segments[step]);
output_segments[step] = states[step] // take state as output
step++;
}
```
According to the [RNN roadmap](https://github.com/PaddlePaddle/Paddle/issues/4561), there are several different RNNs that PaddlePaddle will eventually support.
Currently, the basic RNN implementation supported by PaddlePaddle is the `recurrent_op` which takes tensors as input and splits them into `input_segments`.
Since a tensor cannot store variable-length sequences directly, PaddlePaddle implements the tensor with level of details (`LoDTensor` for short).
Segmenting the `LoDTensor` is much more complicated than splitting a tensor, that makes it necessary to refactor the `recurrent_op` with `LoDTensor` segmenting support.
As the next step in RNN support, `dynamic_recurrent_op` should be introduced to handle inputs with variable-length sequences.
The implementation is similar to `recurrent_op`.
The key difference is the way **the original input `LoDTensors` and outupts are split to get the `input_segments` and the `output_segments`.**
Though it can't be built over `recurrent_op` or `dynamic_recurrent_op` directly,
the logic behind splitting a tensor or a LoD tensor into `input_segments` remains the same.
## Why `TensorArray`
The logic behind splitting the inputs to segments, states and outputs is similar and can be shared in a seperate module.
The array of `states`, `input_segments` and `output_segments` would be exposed to users when writing a dynamic RNN model similar to the above pseudo codes.
So there should be an array-like container, which can store the segments of a tensor or LoD tensor.
**This container can store an array of tensors and provides several methods to split a tensor or a LoD tensor** .
This is where the notion of `TensorArray` comes from.
## Introduce TensorArray to uniform all the three RNNs
TensorArray as a new concept is borrowed from TensorFlow, TensorArray as a new concept is borrowed from TensorFlow,
it is meant to be used with dynamic iteration primitives such as `while_loop` and `map_fn`. it is meant to be used with dynamic iteration primitives such as `while_loop` and `map_fn`.
This concept can be used to support our new design of dynamic operations, and help to refactor some existing variant-sentence-related layers, This concept can be used to support our new design of dynamic operations, and help to refactor some existing variant-sentence-related layers,
such as `RecurrentGradientMachine`. such as `recurrent_op`, `RecurrentGradientMachine`.
In [our design for dynamic RNN](https://github.com/PaddlePaddle/Paddle/pull/4401), In [our design for dynamic RNN](https://github.com/PaddlePaddle/Paddle/pull/4401),
`TensorArray` is used to segment inputs and store states in all time steps. `TensorArray` is used to segment inputs and store states in all time steps.
By providing some methods similar to a C++ array, By providing some methods similar to a C++ array,
the definition of some state-based dynamic models such as RNN could be more natural and highly flexible. the definition of some state-based dynamic models such as RNN can be more natural and highly flexible.
## Dynamic-Related Methods ## Dynamic-operations on TensorArray
Some basic methods should be proposed as follows:
`TensorArray` will be used directly when defining dynamic models, so some operators listed below should be implemented
### stack()
Pack the values in a `TensorArray` into a tensor with rank one higher than each tensor in `values`. ```python
### unstack(axis=0) # several helper operators for TensorArray
Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors. def tensor_array_stack(ta, tensor):
### concat() '''
Return the values in the `TensorArray` as a concatenated Tensor. get a tensor array `ta`, return a packed `tensor`.
### write(index, value, data_shared=true) '''
Write value into index of the TensorArray. pass
### read(index)
Read the value at location `index` in the `TensorArray`. def tensor_array_unstack(tensor, ta):
### size() '''
Return the number of values. get a `tensor`, unstack it and get a tensor array `ta`.
'''
pass
def tensor_array_write(ta, index, tensor, data_shared):
'''
get a `tensor` and a scalar tensor `index`, write `tensor` into index-th
value of the tensor array `ta`.
`data_shared` is an attribute that specifies whether to copy or reference the tensors.
'''
pass
def tensor_array_read(ta, index, tensor):
'''
get a tensor array `ta`, a scalar tensor `index`, read the index-th value of
`ta` and return as the `tensor`.
'''
pass
def tensor_array_size(ta, tensor):
'''
get a tensor array `ta`, return the size of `ta` and return as the scalar `tensor`.
'''
pass
```
It is trivial for users to use so many low-level operators, so some helper methods should be proposed in python wrapper to make `TensorArray` easier to use,
for example
```python
class TensorArray:
def __init__(self, name):
self.name = name
self.desc = TensorArrayDesc()
def stack(self, name=None):
'''
Pack the values in a `TensorArray` into a tensor with rank one higher
than each tensor in `values`.
`stack` can be used to split tensor into time steps for RNN or whileloop.
@name: str
the name of the variable to output.
'''
tensor = NewVar(name)
tensor_array_stack(self.name, tensor)
return tensor
def unstack(self, input):
'''
Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors.
`unstack` can be used to concatenate all the time steps for RNN or whileloop.
@input: str
the name of input tensor
'''
tensor_array_unstack(tensor, self.name)
def write(self, index, value, data_shared=True):
'''
Write value into index of the TensorArray.
If `data_shared` is set to True, than the index-th value in TensorArray will
be shared with the tensor passed in.
@index: str
name of a scalar tensor
@value: str
name of a tensor
@data_shared: bool
'''
tensor_array_write(self.name, index, value, data_shared)
def read(self, index, output):
'''
Read the value at location `index` in the `TensorArray`.
@index: str
name of a scalar tensor
@output:
name of a output variable
'''
tensor_array_read(self.name, index, output)
def size(self, output):
'''
Return the number of values.
@output: str
name of a scalar tensor
'''
tensor_array_size(self.name, output)
```
## LoDTensor-related Supports ## LoDTensor-related Supports
The `RecurrentGradientMachine` in Paddle serves as a flexible RNN layer; it takes variant length sequences as input, The `RecurrentGradientMachine` in Paddle serves as a flexible RNN layer; it takes varience-length sequences as input, and output sequences too.
because each step of RNN could only take a tensor-represented batch of data as input,
Since each step of RNN can only take a tensor-represented batch of data as input,
some preprocess should be taken on the inputs such as sorting the sentences by their length in descending order and cut each word and pack to new batches. some preprocess should be taken on the inputs such as sorting the sentences by their length in descending order and cut each word and pack to new batches.
Such cut-like operations can be embedded into `TensorArray` as general methods called `unpack` and `pack`. Such cut-like operations can be embedded into `TensorArray` as general methods called `unpack` and `pack`,
these two operations are similar to `stack` and `unstack` except that they operate on variable-length sequences formated as a LoD tensor rather than a tensor.
Some definitions are like
```python
def unpack(level):
'''
Split LodTensor in some `level` and generate batches, if set `sort_by_length`,
will sort by length.
With these two methods, a variant-sentence-RNN can be implemented like Returns:
- a new `TensorArray`, whose values are LodTensors and represents batches
of data.
- an int32 Tensor, which stores the map from the new batch's indices to
original LoDTensor
'''
pass
def pack(level, indices_map):
'''
Recover the original LoD-arranged LoDTensor with the values in a `TensorArray`
and `level` and `indices_map`.
'''
pass
```
With these two methods, a varience-length sentence supported RNN can be implemented like
```c++ ```c++
// input is the varient-length data // input is the varient-length data
...@@ -58,16 +269,3 @@ LoDTensor rnn_output = ta.pack(ta, indice_map); ...@@ -58,16 +269,3 @@ LoDTensor rnn_output = ta.pack(ta, indice_map);
``` ```
the code above shows that by embedding the LoDTensor-related preprocess operations into `TensorArray`, the code above shows that by embedding the LoDTensor-related preprocess operations into `TensorArray`,
the implementation of a RNN that supports varient-length sentences is far more concise than `RecurrentGradientMachine` because the latter mixes all the codes together, hard to read and extend. the implementation of a RNN that supports varient-length sentences is far more concise than `RecurrentGradientMachine` because the latter mixes all the codes together, hard to read and extend.
some details are as follows.
### unpack(level, sort_by_length)
Split LodTensor in some `level` and generate batches, if set `sort_by_length`, will sort by length.
Returns:
- a new `TensorArray`, whose values are LodTensors and represents batches of data.
- an int32 Tensor, which stores the map from the new batch's indices to original LoDTensor
### pack(level, indices_map)
Recover the original LoD-arranged LoDTensor with the values in a `TensorArray` and `level` and `indices_map`.
add_subdirectory(cuda) add_subdirectory(cuda)
add_subdirectory(function) add_subdirectory(function)
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(testing)
add_subdirectory(math) add_subdirectory(math)
add_subdirectory(parameter)
add_subdirectory(gserver) add_subdirectory(gserver)
add_subdirectory(pserver) add_subdirectory(parameter)
add_subdirectory(trainer) add_subdirectory(testing)
add_subdirectory(scripts)
add_subdirectory(string)
if(Boost_FOUND)
add_subdirectory(memory)
add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
endif()
if(WITH_C_API) if(MOBILE_INFERENCE)
add_subdirectory(capi) add_subdirectory(capi)
endif() else()
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(string)
add_subdirectory(scripts)
if(WITH_C_API)
add_subdirectory(capi)
endif()
if(Boost_FOUND)
add_subdirectory(memory)
add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
endif()
if(WITH_SWIG_PY) if(WITH_SWIG_PY)
add_subdirectory(api) add_subdirectory(api)
endif()
endif() endif()
...@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; } ...@@ -47,7 +47,7 @@ bool isUsingGpu() { return FLAGS_use_gpu; }
void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; } void setUseGpu(bool useGpu) { FLAGS_use_gpu = useGpu; }
bool isGpuVersion() { bool isGpuVersion() {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return false; return false;
#else #else
return true; return true;
......
...@@ -37,9 +37,7 @@ set(PADDLE_CAPI_INFER_LIBS ...@@ -37,9 +37,7 @@ set(PADDLE_CAPI_INFER_LIBS
paddle_cuda paddle_cuda
paddle_function paddle_function
paddle_gserver paddle_gserver
paddle_proto paddle_proto)
paddle_pserver
paddle_network)
cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS}) cc_library(paddle_capi_whole DEPS paddle_capi ${PADDLE_CAPI_INFER_LIBS})
......
...@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat, ...@@ -46,7 +46,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE; if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE;
paddle::real* buf = ptr->mat->getRowBuf(rowID); paddle::real* buf = ptr->mat->getRowBuf(rowID);
size_t width = ptr->mat->getWidth(); size_t width = ptr->mat->getWidth();
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
hl_memcpy(buf, rowArray, sizeof(paddle::real) * width); hl_memcpy(buf, rowArray, sizeof(paddle::real) * width);
#else #else
std::copy(rowArray, rowArray + width, buf); std::copy(rowArray, rowArray + width, buf);
......
...@@ -4,11 +4,12 @@ add_unittest(capi_test_mats test_Vector.cpp ...@@ -4,11 +4,12 @@ add_unittest(capi_test_mats test_Vector.cpp
target_include_directories(capi_test_mats PUBLIC ${PADDLE_CAPI_INC_PATH}) target_include_directories(capi_test_mats PUBLIC ${PADDLE_CAPI_INC_PATH})
target_link_libraries(capi_test_mats paddle_capi) target_link_libraries(capi_test_mats paddle_capi)
if(NOT MOBILE_INFERENCE)
add_unittest_without_exec(capi_test_gradientMachine test_GradientMachine.cpp) add_unittest_without_exec(capi_test_gradientMachine test_GradientMachine.cpp)
target_include_directories(capi_test_gradientMachine PUBLIC target_include_directories(capi_test_gradientMachine PUBLIC
${PADDLE_CAPI_INC_PATH}) ${PADDLE_CAPI_INC_PATH})
target_link_libraries(capi_test_gradientMachine paddle_capi) target_link_libraries(capi_test_gradientMachine paddle_capi)
add_test(NAME capi_test_gradientMachine add_test(NAME capi_test_gradientMachine
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests)
endif()
...@@ -23,13 +23,11 @@ cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc ...@@ -23,13 +23,11 @@ cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator)
cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder op_proto_maker op_info)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry sum_op)
py_proto_compile(framework_py_proto SRCS framework.proto) py_proto_compile(framework_py_proto SRCS framework.proto)
# Generate an empty __init__.py to make framework_py_proto as a valid python module. # Generate an empty __init__.py to make framework_py_proto as a valid python module.
......
...@@ -13,10 +13,13 @@ ...@@ -13,10 +13,13 @@
limitations under the License. */ limitations under the License. */
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include "paddle/operators/net_op.h"
#include <deque>
#include <list> #include <list>
#include <memory> #include <memory>
#include "paddle/framework/block_desc.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h" #include "paddle/operators/recurrent_op.h"
...@@ -24,6 +27,35 @@ ...@@ -24,6 +27,35 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
static inline std::unique_ptr<OperatorBase> CreateGradOp(
const OperatorBase& op) {
OpDescBind op_desc;
op_desc.SetInputMap(op.Inputs());
op_desc.SetOutputMap(op.Outputs());
op_desc.SetType(op.Type());
op_desc.SetAttrMap(op.Attrs());
auto& info = OpInfoMap::Instance().Get(op.Type());
auto grad_descs = info.GradOpMaker()(op_desc);
std::vector<std::unique_ptr<OperatorBase>> grad_ops;
grad_ops.reserve(grad_descs.size());
std::transform(grad_descs.begin(), grad_descs.end(),
std::back_inserter(grad_ops),
[](const std::unique_ptr<OpDescBind>& grad_desc) {
return OpRegistry::CreateOp(*grad_desc);
});
PADDLE_ENFORCE(!grad_ops.empty());
if (grad_ops.size() == 1) {
return std::move(grad_ops[0]);
} else {
auto net_op = new operators::NetOp();
for (auto& grad_op : grad_ops) {
net_op->AppendOp(std::move(grad_op));
}
net_op->CompleteAddOp();
return std::unique_ptr<OperatorBase>(net_op);
}
}
template <typename Map, typename T> template <typename Map, typename T>
static void ForEachVarName(const Map& names, T callback) { static void ForEachVarName(const Map& names, T callback) {
for (auto& name : names) { for (auto& name : names) {
...@@ -171,7 +203,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -171,7 +203,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
net->InsertOp(pos.first + 1, std::move(pos.second)); net->InsertOp(pos.first + 1, std::move(pos.second));
} }
} else { } else {
std::unique_ptr<OperatorBase> grad_op(OpRegistry::CreateGradOp(forwardOp)); std::unique_ptr<OperatorBase> grad_op(CreateGradOp(forwardOp));
ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
const std::string& grad_input) { const std::string& grad_input) {
...@@ -240,5 +272,145 @@ std::unique_ptr<OperatorBase> Backward( ...@@ -240,5 +272,145 @@ std::unique_ptr<OperatorBase> Backward(
return BackwardRecursive(forwardOp, no_grad_names, uid); return BackwardRecursive(forwardOp, no_grad_names, uid);
} }
// ==================================== //
static bool AllGradInSet(const std::vector<std::string>& names,
const std::unordered_set<std::string>& set) {
for (const std::string& name : names) {
if (!set.count(GradVarName(name))) {
return false;
}
}
return true;
}
std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
const std::unique_ptr<OpDescBind>& op_desc,
std::unordered_set<std::string>& no_grad_vars) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs;
// All input gradients of forwarding operator do not need to calculat.
const std::vector<std::string>& inputs = op_desc->InputArgumentNames();
if (AllGradInSet(inputs, no_grad_vars)) {
return grad_op_descs; // empty vector
}
// All output gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& outputs = op_desc->OutputArgumentNames();
if (AllGradInSet(outputs, no_grad_vars)) {
for (const std::string& name : inputs) {
no_grad_vars.insert(GradVarName(name));
}
return grad_op_descs; // empty vector
}
grad_op_descs = OpRegistry::CreateGradOpDescs(*op_desc);
std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops;
for (auto& desc : grad_op_descs) {
for (const std::string& in_name : desc->InputArgumentNames()) {
if (no_grad_vars.count(in_name)) {
std::string prefix = in_name.substr(
0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
std::string new_name = prefix + kZeroVarSuffix;
desc->Rename(in_name, new_name);
std::unique_ptr<OpDescBind> fill_zeros_op(new OpDescBind(
"fill_zeros_like", {{"X", {prefix}}}, {{"Y", {new_name}}}, {}));
pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
}
}
for (const std::string& out_name : desc->OutputArgumentNames()) {
if (no_grad_vars.count(out_name)) {
desc->Rename(out_name, kEmptyVarName);
}
}
}
for (auto& p : pending_fill_zeros_ops) {
grad_op_descs.insert(grad_op_descs.begin(), std::move(p));
}
return grad_op_descs;
}
std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx,
std::unordered_set<std::string>& no_grad_vars) {
BlockDescBind* cur_block = program_desc.Block(block_idx);
std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_;
std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
size_t grad_desc_idx = 0;
std::vector<std::unique_ptr<OpDescBind>> backward_descs;
for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
std::vector<std::unique_ptr<OpDescBind>> op_grads =
MakeOpGrad(*it, no_grad_vars);
if ((*it)->Type() == "recurrent") {
PADDLE_ENFORCE_EQ(
op_grads.size(), size_t(1),
"rnn_op's gradient process should contain only one op.");
int step_block_idx = (*it)->GetBlockAttr("stop_block");
auto backward_block_op_descs =
MakeBlockBackward(program_desc, step_block_idx, no_grad_vars);
BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block);
for (auto& ptr : backward_block_op_descs) {
backward_block->ops_.push_back(std::move(ptr));
}
op_grads[0]->SetBlockAttr("step_block", *backward_block);
}
for (const auto& desc : op_grads) {
for (const std::string& out_name : desc->OutputArgumentNames()) {
dup_out_ops[out_name].emplace_back(grad_desc_idx);
}
++grad_desc_idx;
}
std::transform(
op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs),
[](std::unique_ptr<OpDescBind>& ptr) { return std::move(ptr); });
}
// Check whether some variables are written more than once
std::list<std::pair<size_t, std::unique_ptr<OpDescBind>>> pending_sum_ops;
for (const auto& dup : dup_out_ops) {
const std::string& out_name = dup.first;
const std::vector<size_t> dup_op = dup.second;
if (out_name != kEmptyVarName && dup_op.size() > 1) {
std::vector<std::string> sum_op_inputs;
for (size_t i = 0; i < dup_op.size(); ++i) {
std::string new_name = out_name + "@RENAME@" + std::to_string(i);
backward_descs[dup_op[i]]->Rename(out_name, new_name);
sum_op_inputs.emplace_back(new_name);
}
std::unique_ptr<OpDescBind> sum_op(new OpDescBind(
"sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}}, {}));
pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
}
}
pending_sum_ops.sort(
[](const std::pair<size_t, std::unique_ptr<OpDescBind>>& a,
const std::pair<size_t, std::unique_ptr<OpDescBind>>& b) {
return a.first > b.first;
});
for (auto& p : pending_sum_ops) {
backward_descs.insert(backward_descs.begin() + p.first + 1,
std::move(p.second));
}
return backward_descs;
}
void AppendBackward(ProgramDescBind& program_desc,
const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_var_names;
no_grad_var_names.reserve(no_grad_vars.size() + 1);
no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
for (auto& name : no_grad_vars) {
no_grad_var_names.insert(GradVarName(name));
}
const int root_block_idx = 0;
auto backward_op_descs =
MakeBlockBackward(program_desc, root_block_idx, no_grad_var_names);
auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_;
for (auto& ptr : backward_op_descs) {
forw_op_descs.push_back(std::move(ptr));
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -13,8 +13,11 @@ ...@@ -13,8 +13,11 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <unordered_set> #include <unordered_set>
#include "operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -23,5 +26,9 @@ namespace framework { ...@@ -23,5 +26,9 @@ namespace framework {
extern std::unique_ptr<OperatorBase> Backward( extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars); const std::unordered_set<std::string>& no_grad_vars);
void AppendBackward(ProgramDescBind& program_desc,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -15,30 +15,42 @@ ...@@ -15,30 +15,42 @@
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/block_desc.h"
#include "paddle/framework/op_desc.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using OperatorBase = framework::OperatorBase;
using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker;
using OpProto = framework::OpProto;
using OpAttrChecker = framework::OpAttrChecker;
using Scope = framework::Scope;
using DeviceContext = platform::DeviceContext; using DeviceContext = platform::DeviceContext;
class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
public: public:
RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input X of Add").NotInGradient(); AddInput("X", "Input X of Add");
AddInput("b", "Bias of Add").NotInGradient(); AddInput("b", "Bias of Add");
AddOutput("Out", "Out of Add").NotInGradient(); AddOutput("Out", "Out of Add");
AddComment("Add Op"); AddComment("Add Op");
} }
}; };
class RowWiseAddGradMaker : public SingleGradOpDescMaker {
public:
using SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<OpDescBind> Apply() const override {
auto grad_op = new OpDescBind();
grad_op->SetInput(GradVarName("Out"), OutputGrad("Out"));
grad_op->SetOutput(GradVarName("X"), InputGrad("X"));
grad_op->SetOutput(GradVarName("b"), InputGrad("b"));
grad_op->SetType("rowwise_add_grad");
return std::unique_ptr<OpDescBind>(grad_op);
}
};
class MulOpMaker : public OpProtoAndCheckerMaker { class MulOpMaker : public OpProtoAndCheckerMaker {
public: public:
MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
...@@ -137,10 +149,20 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -137,10 +149,20 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SumOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SumOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensors of sum operator.") AddInput("X", "the input tensors of sum operator.").AsDuplicable();
.AsDuplicable() AddOutput("Out", "the output tensor of sum operator.");
.NotInGradient(); AddComment("");
AddOutput("Out", "the output tensor of sum operator.").NotInGradient(); }
};
class MultInOutOpMaker : public OpProtoAndCheckerMaker {
public:
MultInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "x");
AddInput("H", "h");
AddOutput("Y", "y");
AddOutput("Z", "z");
AddComment(""); AddComment("");
} }
}; };
...@@ -151,8 +173,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -151,8 +173,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
namespace f = paddle::framework; namespace f = paddle::framework;
namespace ops = paddle::operators; namespace ops = paddle::operators;
using EnforceNotMet = paddle::platform::EnforceNotMet; using EnforceNotMet = paddle::platform::EnforceNotMet;
REGISTER_OP(rowwise_add, f::NOP, f::RowWiseAddOpMaker, rowwise_add_grad, REGISTER_OPERATOR(rowwise_add, f::NOP, f::RowWiseAddOpMaker,
f::NOP); f::RowWiseAddGradMaker);
REGISTER_OPERATOR(rowwise_add_grad, f::NOP);
REGISTER_OP(mul, f::NOP, f::MulOpMaker, mul_grad, f::NOP); REGISTER_OP(mul, f::NOP, f::MulOpMaker, mul_grad, f::NOP);
REGISTER_OP(sigmoid, f::NOP, f::SigmoidOpMaker, sigmoid_grad, f::NOP); REGISTER_OP(sigmoid, f::NOP, f::SigmoidOpMaker, sigmoid_grad, f::NOP);
REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NOP, f::NoGradOpMaker); REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NOP, f::NoGradOpMaker);
...@@ -161,17 +184,7 @@ REGISTER_OP(sum, f::NOP, f::SumOpMaker, sum_grad, f::NOP); ...@@ -161,17 +184,7 @@ REGISTER_OP(sum, f::NOP, f::SumOpMaker, sum_grad, f::NOP);
REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad, REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad,
f::NOP); f::NOP);
REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP);
TEST(Backward, simple_op_grad) {
auto fwd = f::OpRegistry::CreateOp(
"rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {});
ASSERT_NE(fwd, nullptr);
auto gop = f::OpRegistry::CreateGradOp(*fwd);
ASSERT_EQ(1UL, gop->Inputs().size());
ASSERT_EQ("rowwise_add_grad", gop->Type());
ASSERT_EQ(f::GradVarName("x"), gop->Output(f::GradVarName("X")));
ASSERT_EQ(f::GradVarName("b"), gop->Output(f::GradVarName("b")));
}
TEST(Backward, simple_op_not_need_grad) { TEST(Backward, simple_op_not_need_grad) {
auto fwd = f::OpRegistry::CreateOp( auto fwd = f::OpRegistry::CreateOp(
...@@ -289,17 +302,6 @@ TEST(Backward, net_shared_weight) { ...@@ -289,17 +302,6 @@ TEST(Backward, net_shared_weight) {
ASSERT_EQ("sum", bwd_net->ops_[2]->Type()); ASSERT_EQ("sum", bwd_net->ops_[2]->Type());
} }
TEST(Backward, op_register_grad_not_for_network) {
auto fwd =
f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}},
{{"mul_result", {"mul_out"}},
{"add_result", {"add_out"}},
{"Out", {"out1"}}},
{{"temporary_index", std::vector<int>{0, 1}}});
ASSERT_THROW(f::OpRegistry::CreateGradOp(*fwd), EnforceNotMet);
}
TEST(Backward, op_all_input_are_not_need) { TEST(Backward, op_all_input_are_not_need) {
auto fwd = f::OpRegistry::CreateOp( auto fwd = f::OpRegistry::CreateOp(
"rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {}); "rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, {});
...@@ -402,3 +404,293 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ...@@ -402,3 +404,293 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL);
EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL);
} }
// =================================== //
f::ProgramDesc *GetNewProgramDesc() {
auto *program_desc = new f::ProgramDesc();
auto *root_block = program_desc->add_blocks();
root_block->set_idx(0);
root_block->set_parent_idx(-1);
return program_desc;
}
TEST(Backward, simple_single_op) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op = block->AppendOp();
op->SetType("rowwise_add");
op->SetInput("X", {"x"});
op->SetInput("b", {"b"});
op->SetOutput("Out", {"out"});
AppendBackward(program, {});
ASSERT_EQ(block->AllOps().size(), 2UL);
f::OpDescBind *grad_op = block->AllOps()[1];
EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op->InputNames().size(), 1UL);
ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("x")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b")}));
}
TEST(Backward, simple_mult_op) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp();
op2->SetType("mul");
op2->SetInput("X", {"out1"});
op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp();
op3->SetType("rowwise_add");
op3->SetInput("X", {"out2"});
op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"});
AppendBackward(program, {});
ASSERT_EQ(block->AllOps().size(), 6UL);
f::OpDescBind *grad_op1 = block->AllOps()[5];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op2 = block->AllOps()[4];
EXPECT_EQ(grad_op2->Type(), "mul_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op2->Input("X"), std::vector<std::string>({"out1"}));
EXPECT_EQ(grad_op2->Input("Y"), std::vector<std::string>({"y2"}));
EXPECT_EQ(grad_op2->Input("Out"), std::vector<std::string>({"out2"}));
EXPECT_EQ(grad_op2->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out2")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *grad_op3 = block->AllOps()[3];
EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out3")}));
EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out2")}));
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")}));
}
TEST(Backward, intermedia_var_no_grad) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp();
op2->SetType("mul");
op2->SetInput("X", {"x2"});
op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp();
op3->SetType("rowwise_add");
op3->SetInput("X", {"out2"});
op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"});
f::OpDescBind *op4 = block->AppendOp();
op4->SetType("mul");
op4->SetInput("X", {"out1"});
op4->SetInput("Y", {"out3"});
op4->SetOutput("Out", {"out4"});
AppendBackward(program, {"out3"});
ASSERT_EQ(block->AllOps().size(), 6UL);
f::OpDescBind *grad_op1 = block->AllOps()[5];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op4 = block->AllOps()[4];
EXPECT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op4->Input("X"), std::vector<std::string>({"out1"}));
EXPECT_EQ(grad_op4->Input("Y"), std::vector<std::string>({"out3"}));
EXPECT_EQ(grad_op4->Input("Out"), std::vector<std::string>({"out4"}));
EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out4")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
std::vector<std::string>({f::kEmptyVarName}));
}
TEST(Backward, var_no_grad) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("mult_in_out");
op1->SetInput("X", {"x1"});
op1->SetInput("H", {"h1"});
op1->SetOutput("Y", {"y1"});
op1->SetOutput("Z", {"z1"});
f::OpDescBind *op2 = block->AppendOp();
op2->SetType("mult_in_out");
op2->SetInput("X", {"y1"});
op2->SetInput("H", {"z1"});
op2->SetOutput("Y", {"y2"});
op2->SetOutput("Z", {"z2"});
AppendBackward(program, {"z1"});
ASSERT_EQ(block->AllOps().size(), 5UL);
f::OpDescBind *grad_op2 = block->AllOps()[2];
ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op2->Input("X"), std::vector<std::string>({"y1"}));
EXPECT_EQ(grad_op2->Input("H"), std::vector<std::string>({"z1"}));
EXPECT_EQ(grad_op2->Input("Y"), std::vector<std::string>({"y2"}));
EXPECT_EQ(grad_op2->Input("Z"), std::vector<std::string>({"z2"}));
EXPECT_EQ(grad_op2->Input(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")}));
EXPECT_EQ(grad_op2->Input(f::GradVarName("Z")),
std::vector<std::string>({f::GradVarName("z2")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("y1")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("H")),
std::vector<std::string>({f::kEmptyVarName}));
f::OpDescBind *fill_zero_op = block->AllOps()[3];
ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
EXPECT_EQ(fill_zero_op->Input("X"), std::vector<std::string>({"z1"}));
EXPECT_EQ(fill_zero_op->Output("Y"),
std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix}));
f::OpDescBind *grad_op1 = block->AllOps()[4];
ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op1->Input("X"), std::vector<std::string>({"x1"}));
EXPECT_EQ(grad_op1->Input("H"), std::vector<std::string>({"h1"}));
EXPECT_EQ(grad_op1->Input("Y"), std::vector<std::string>({"y1"}));
EXPECT_EQ(grad_op1->Input("Z"), std::vector<std::string>({"z1"}));
EXPECT_EQ(grad_op1->Input(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y1")}));
EXPECT_EQ(grad_op1->Input(f::GradVarName("Z")),
std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
std::vector<std::string>({f::GradVarName("h1")}));
}
TEST(Backward, shared_var) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
f::OpDescBind *op1 = block->AppendOp();
op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp();
op2->SetType("mul");
op2->SetInput("X", {"out1"});
op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp();
op3->SetType("rowwise_add");
op3->SetInput("X", {"out1"});
op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"});
AppendBackward(program, {});
ASSERT_EQ(block->AllOps().size(), 7UL);
f::OpDescBind *grad_op3 = block->AllOps()[3];
ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out3")}));
EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out1") + "@RENAME@0"}));
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")}));
f::OpDescBind *grad_op4 = block->AllOps()[4];
ASSERT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op4->Input("X"), std::vector<std::string>({"out1"}));
EXPECT_EQ(grad_op4->Input("Y"), std::vector<std::string>({"y2"}));
EXPECT_EQ(grad_op4->Input("Out"), std::vector<std::string>({"out2"}));
EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out2")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out1") + "@RENAME@1"}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *sum_op = block->AllOps()[5];
ASSERT_EQ(sum_op->Type(), "sum");
ASSERT_EQ(sum_op->InputNames().size(), 1UL);
ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
EXPECT_EQ(sum_op->Input("X"),
std::vector<std::string>({f::GradVarName("out1") + "@RENAME@0",
f::GradVarName("out1") + "@RENAME@1"}));
EXPECT_EQ(sum_op->Output("Out"),
std::vector<std::string>({f::GradVarName("out1")}));
f::OpDescBind *grad_op1 = block->AllOps()[6];
ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")}));
}
\ No newline at end of file
...@@ -34,6 +34,10 @@ VarDescBind *BlockDescBind::Var(const std::string &name) const { ...@@ -34,6 +34,10 @@ VarDescBind *BlockDescBind::Var(const std::string &name) const {
return it->second.get(); return it->second.get();
} }
bool BlockDescBind::HasVar(const std::string &name) const {
return vars_.find(name) != vars_.end();
}
std::vector<VarDescBind *> BlockDescBind::AllVars() const { std::vector<VarDescBind *> BlockDescBind::AllVars() const {
std::vector<VarDescBind *> res; std::vector<VarDescBind *> res;
for (const auto &p : vars_) { for (const auto &p : vars_) {
...@@ -70,6 +74,12 @@ void BlockDescBind::Sync() { ...@@ -70,6 +74,12 @@ void BlockDescBind::Sync() {
for (auto &op_desc : ops_) { for (auto &op_desc : ops_) {
op_field.AddAllocated(op_desc->Proto()); op_field.AddAllocated(op_desc->Proto());
} }
auto &var_field = *this->desc_->mutable_vars();
var_field.Clear();
var_field.Reserve(static_cast<int>(vars_.size()));
for (auto &var_desc : vars_) {
var_field.AddAllocated(var_desc.second->Proto());
}
need_update_ = false; need_update_ = false;
} }
} }
......
...@@ -32,6 +32,14 @@ class ProgramDescBind; ...@@ -32,6 +32,14 @@ class ProgramDescBind;
class BlockDescBind { class BlockDescBind {
public: public:
friend std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind &program_desc, int block_idx,
std::unordered_set<std::string> &no_grad_vars);
friend void AppendBackward(
ProgramDescBind &program_desc,
const std::unordered_set<std::string> &no_grad_vars);
BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) BlockDescBind(ProgramDescBind *prog, BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) {} : prog_(prog), desc_(desc), need_update_(false) {}
...@@ -43,6 +51,8 @@ class BlockDescBind { ...@@ -43,6 +51,8 @@ class BlockDescBind {
VarDescBind *Var(const std::string &name_bytes) const; VarDescBind *Var(const std::string &name_bytes) const;
bool HasVar(const std::string &var_name) const;
std::vector<VarDescBind *> AllVars() const; std::vector<VarDescBind *> AllVars() const;
BlockDescBind *ParentBlock() const; BlockDescBind *ParentBlock() const;
......
...@@ -66,7 +66,6 @@ message OpProto { ...@@ -66,7 +66,6 @@ message OpProto {
optional bool duplicable = 3 [ default = false ]; optional bool duplicable = 3 [ default = false ];
optional bool intermediate = 4 [ default = false ]; optional bool intermediate = 4 [ default = false ];
optional bool not_in_gradient = 5 [ default = false ];
} }
// AttrProto describes the C++ type Attribute. // AttrProto describes the C++ type Attribute.
...@@ -116,4 +115,7 @@ message BlockDesc { ...@@ -116,4 +115,7 @@ message BlockDesc {
repeated OpDesc ops = 4; repeated OpDesc ops = 4;
} }
// Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
// for more details.
message ProgramDesc { repeated BlockDesc blocks = 1; } message ProgramDesc { repeated BlockDesc blocks = 1; }
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOpArgType::OUT WARRANTIES OR CONDITIONS OF ANY KOpArgType::IND, either
express or implied. See the License for the specific language governing
permissions and limitations under the License. */
#include "paddle/framework/grad_op_builder.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace framework {
enum class OpArgType { IN, OUT };
static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
bool is_grad, VariableNameMap* vars) {
const auto& src_inout =
src_type == OpArgType::IN ? src_op->Inputs() : src_op->Outputs();
auto& dst_inout = *vars;
auto& proto = OpInfoMap::Instance().Get(src_op->Type()).Proto();
const auto& src_arg_list =
src_type == OpArgType::IN ? proto.inputs() : proto.outputs();
for (const auto& arg : src_arg_list) {
if (arg.not_in_gradient() && !is_grad) continue;
const std::string src_name = arg.name();
std::string dst_name = is_grad ? GradVarName(src_name) : src_name;
dst_inout[dst_name].reserve(src_inout.at(src_name).size());
for (auto& var_name : src_inout.at(src_name)) {
std::string s = is_grad ? GradVarName(var_name) : var_name;
dst_inout[dst_name].emplace_back(s);
}
}
}
OperatorBase* BuildGradOp(const OperatorBase* op) {
auto& info = OpInfoMap::Instance().Get(op->Type());
PADDLE_ENFORCE(info.HasGradientOp());
VariableNameMap inputs;
VariableNameMap outputs;
TransOpArg(op, OpArgType::IN, false, &inputs); // I
TransOpArg(op, OpArgType::OUT, false, &inputs); // O
TransOpArg(op, OpArgType::OUT, true, &inputs); // OG
TransOpArg(op, OpArgType::IN, true, &outputs); // IG
auto& grad_info = OpInfoMap::Instance().Get(info.grad_op_type_);
return grad_info.Creator()(info.grad_op_type_, inputs, outputs, op->Attrs());
}
static void TransOpDescArg(const OpDescBind* src_op, const OpArgType& src_type,
bool is_grad, OpDescBind* dst_op,
const OpArgType& dst_type) {
PADDLE_ENFORCE(dst_op != nullptr,
"Protobuf desc of gradient op must be initialized first.");
const auto& proto = OpInfoMap::Instance().Get(src_op->Type()).Proto();
const auto& src_arg_list =
src_type == OpArgType::IN ? proto.inputs() : proto.outputs();
for (const auto& arg : src_arg_list) {
if (arg.not_in_gradient() && !is_grad) continue;
const std::string src_name = arg.name();
std::vector<std::string> vars = src_type == OpArgType::IN
? src_op->Input(src_name)
: src_op->Output(src_name);
if (is_grad) {
for (std::string& var : vars) {
var = GradVarName(var);
}
}
std::string dst_name = is_grad ? GradVarName(src_name) : src_name;
dst_type == OpArgType::IN ? dst_op->SetInput(dst_name, vars)
: dst_op->SetOutput(dst_name, vars);
}
}
void CompleteGradOpDesc(const OpDescBind* forw_op, OpDescBind* grad_op) {
auto& info = OpInfoMap::Instance().Get(forw_op->Type());
PADDLE_ENFORCE(info.HasGradientOp());
grad_op->SetType(info.grad_op_type_);
TransOpDescArg(forw_op, OpArgType::IN, false, grad_op, OpArgType::IN);
TransOpDescArg(forw_op, OpArgType::OUT, false, grad_op, OpArgType::IN);
TransOpDescArg(forw_op, OpArgType::OUT, true, grad_op, OpArgType::IN);
TransOpDescArg(forw_op, OpArgType::IN, true, grad_op, OpArgType::OUT);
grad_op->SetAttrMap(forw_op->GetAttrMap());
}
} // namespace framework
} // namespace paddle
#include "paddle/framework/grad_op_builder.h"
#include <gtest/gtest.h>
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
USE_OP(sum);
namespace paddle {
namespace framework {
class MutiInOutOpMaker : public OpProtoAndCheckerMaker {
public:
MutiInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("In1", "a single input");
AddInput("In2_mult", "a multiple input").AsDuplicable();
AddInput("In3", "another single input");
AddOutput("Out1", "a single output");
AddOutput("Out2_mult", "a multiple output").AsDuplicable();
AddComment("test op with multiple inputs and outputs");
}
};
class IOIgnoredOpMaker : public OpProtoAndCheckerMaker {
public:
IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("In1", "a single input");
AddInput("In2_mult", "a multiple input").AsDuplicable().NotInGradient();
AddInput("In3_mult", "another multiple input").AsDuplicable();
AddOutput("Out1_mult", "a multiple output").AsDuplicable();
AddOutput("Out2", "a single output").NotInGradient();
AddComment("op with inputs and outputs ignored in gradient calculating");
}
};
} // namespace framework
} // namespace paddle
namespace f = paddle::framework;
REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker, mult_io_grad, f::NOP);
REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker, io_ignored_grad, f::NOP);
TEST(GradOpBuilder, MutiInOut) {
std::shared_ptr<f::OperatorBase> test_op(f::OpRegistry::CreateOp(
"mult_io", {{"In1", {"in1"}},
{"In2_mult", {"in2_1", "in2_2", "in2_3"}},
{"In3", {"in3"}}},
{{"Out1", {"out1"}}, {"Out2_mult", {"out2_1", "out2_2"}}}, {}));
std::shared_ptr<f::OperatorBase> grad_test_op =
f::OpRegistry::CreateGradOp(*test_op);
ASSERT_EQ(grad_test_op->Inputs().size(), 3UL + 2UL + 2UL);
EXPECT_EQ(grad_test_op->Input("In1"), "in1");
EXPECT_EQ(grad_test_op->Inputs("In2_mult"),
std::vector<std::string>({"in2_1", "in2_2", "in2_3"}));
EXPECT_EQ(grad_test_op->Input("In3"), "in3");
EXPECT_EQ(grad_test_op->Input("Out1"), "out1");
EXPECT_EQ(grad_test_op->Inputs("Out2_mult"),
std::vector<std::string>({"out2_1", "out2_2"}));
EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out1")),
f::GradVarName("out1"));
EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out2_mult")),
std::vector<std::string>(
{f::GradVarName("out2_1"), f::GradVarName("out2_2")}));
ASSERT_EQ(grad_test_op->Outputs().size(), 3UL);
EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1"));
EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")),
std::vector<std::string>({f::GradVarName("in2_1"),
f::GradVarName("in2_2"),
f::GradVarName("in2_3")}));
EXPECT_EQ(grad_test_op->Output(f::GradVarName("In3")), f::GradVarName("in3"));
}
TEST(GradOpBuilder, IOIgnoredInGradient) {
std::shared_ptr<f::OperatorBase> test_op(f::OpRegistry::CreateOp(
"io_ignored", {{"In1", {"in1"}},
{"In2_mult", {"in2_1", "in2_2"}},
{"In3_mult", {"in3_1", "in3_2"}}},
{{"Out1_mult", {"out1_1", "out1_2"}}, {"Out2", {"out2"}}}, {}));
std::shared_ptr<f::OperatorBase> grad_test_op =
f::OpRegistry::CreateGradOp(*test_op);
// 'In2' and 'Out2' are ignored in gradient calculating
ASSERT_EQ(grad_test_op->Inputs().size(), 2UL + 1UL + 2UL);
EXPECT_EQ(grad_test_op->Input("In1"), "in1");
EXPECT_EQ(grad_test_op->Inputs("In3_mult"),
std::vector<std::string>({"in3_1", "in3_2"}));
EXPECT_EQ(grad_test_op->Inputs("Out1_mult"),
std::vector<std::string>({"out1_1", "out1_2"}));
EXPECT_EQ(grad_test_op->Inputs(f::GradVarName("Out1_mult")),
std::vector<std::string>(
{f::GradVarName("out1_1"), f::GradVarName("out1_2")}));
EXPECT_EQ(grad_test_op->Input(f::GradVarName("Out2")),
f::GradVarName("out2"));
ASSERT_EQ(grad_test_op->Outputs().size(), 3UL);
EXPECT_EQ(grad_test_op->Output(f::GradVarName("In1")), f::GradVarName("in1"));
EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In2_mult")),
std::vector<std::string>(
{f::GradVarName("in2_1"), f::GradVarName("in2_2")}));
EXPECT_EQ(grad_test_op->Outputs(f::GradVarName("In3_mult")),
std::vector<std::string>(
{f::GradVarName("in3_1"), f::GradVarName("in3_2")}));
}
TEST(GradOpDescBuilder, MutiInOut) {
f::OpDescBind *forw_op = new f::OpDescBind();
forw_op->SetType("mult_io");
forw_op->SetInput("In1", {"in1"});
forw_op->SetInput("In2_mult", {"in2_1", "in2_2", "in2_3"});
forw_op->SetInput("In3", {"in3"});
forw_op->SetOutput("Out1", {"out1"});
forw_op->SetOutput("Out2_mult", {"out2_1", "out2_2"});
f::OpDescBind *grad_op = new f::OpDescBind();
f::CompleteGradOpDesc(forw_op, grad_op);
EXPECT_EQ(grad_op->Type(), "mult_io_grad");
ASSERT_EQ(grad_op->InputNames().size(), 3UL + 2UL + 2UL);
EXPECT_EQ(grad_op->Input("In1"), std::vector<std::string>({"in1"}));
EXPECT_EQ(grad_op->Input("In2_mult"),
std::vector<std::string>({"in2_1", "in2_2", "in2_3"}));
EXPECT_EQ(grad_op->Input("In3"), std::vector<std::string>({"in3"}));
EXPECT_EQ(grad_op->Input("Out1"), std::vector<std::string>({"out1"}));
EXPECT_EQ(grad_op->Input("Out2_mult"),
std::vector<std::string>({"out2_1", "out2_2"}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out1")),
std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out2_mult")),
std::vector<std::string>(
{f::GradVarName("out2_1"), f::GradVarName("out2_2")}));
ASSERT_EQ(grad_op->OutputNames().size(), 3UL);
EXPECT_EQ(grad_op->Output(f::GradVarName("In1")),
std::vector<std::string>({f::GradVarName("in1")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In2_mult")),
std::vector<std::string>({f::GradVarName("in2_1"),
f::GradVarName("in2_2"),
f::GradVarName("in2_3")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In3")),
std::vector<std::string>({f::GradVarName("in3")}));
delete forw_op;
delete grad_op;
}
TEST(GradOpDescBuilder, IOIgnoredInGradient) {
f::OpDescBind *forw_op = new f::OpDescBind();
forw_op->SetType("io_ignored");
forw_op->SetInput("In1", {"in1"});
forw_op->SetInput("In2_mult", {"in2_1", "in2_2"});
forw_op->SetInput("In3_mult", {"in3_1", "in3_2"});
forw_op->SetOutput("Out1_mult", {"out1_1", "out1_2"});
forw_op->SetOutput("Out2", {"out2"});
f::OpDescBind *grad_op = new f::OpDescBind();
f::CompleteGradOpDesc(forw_op, grad_op);
EXPECT_EQ(grad_op->Type(), "io_ignored_grad");
// 'In2' and 'Out2' are ignored in gradient calculating
ASSERT_EQ(grad_op->InputNames().size(), 2UL + 1UL + 2UL);
EXPECT_EQ(grad_op->Input("In1"), std::vector<std::string>({"in1"}));
EXPECT_EQ(grad_op->Input("In3_mult"),
std::vector<std::string>({"in3_1", "in3_2"}));
EXPECT_EQ(grad_op->Input("Out1_mult"),
std::vector<std::string>({"out1_1", "out1_2"}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out1_mult")),
std::vector<std::string>(
{f::GradVarName("out1_1"), f::GradVarName("out1_2")}));
EXPECT_EQ(grad_op->Input(f::GradVarName("Out2")),
std::vector<std::string>({f::GradVarName("out2")}));
ASSERT_EQ(grad_op->OutputNames().size(), 3UL);
EXPECT_EQ(grad_op->Output(f::GradVarName("In1")),
std::vector<std::string>({f::GradVarName("in1")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In2_mult")),
std::vector<std::string>(
{f::GradVarName("in2_1"), f::GradVarName("in2_2")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("In3_mult")),
std::vector<std::string>(
{f::GradVarName("in3_1"), f::GradVarName("in3_2")}));
delete forw_op;
delete grad_op;
}
\ No newline at end of file
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#pragma once #pragma once
#include <memory> #include <memory>
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/host_vector.h> #include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h> #include <thrust/system/cuda/experimental/pinned_allocator.h>
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
template <typename T> template <typename T>
using Vector = std::vector<T>; using Vector = std::vector<T>;
#else #else
......
...@@ -18,6 +18,15 @@ limitations under the License. */ ...@@ -18,6 +18,15 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs) {
op_desc_.set_type(type);
inputs_ = inputs;
outputs_ = outputs;
attrs_ = attrs;
}
OpDesc *OpDescBind::Proto() { OpDesc *OpDescBind::Proto() {
Sync(); Sync();
return &op_desc_; return &op_desc_;
...@@ -31,6 +40,14 @@ const std::vector<std::string> &OpDescBind::Input( ...@@ -31,6 +40,14 @@ const std::vector<std::string> &OpDescBind::Input(
return it->second; return it->second;
} }
std::vector<std::string> OpDescBind::InputArgumentNames() const {
std::vector<std::string> retv;
for (auto &ipt : this->inputs_) {
retv.insert(retv.end(), ipt.second.begin(), ipt.second.end());
}
return retv;
}
void OpDescBind::SetInput(const std::string &param_name, void OpDescBind::SetInput(const std::string &param_name,
const std::vector<std::string> &args) { const std::vector<std::string> &args) {
need_update_ = true; need_update_ = true;
...@@ -45,6 +62,14 @@ const std::vector<std::string> &OpDescBind::Output( ...@@ -45,6 +62,14 @@ const std::vector<std::string> &OpDescBind::Output(
return it->second; return it->second;
} }
std::vector<std::string> OpDescBind::OutputArgumentNames() const {
std::vector<std::string> retv;
for (auto &ipt : this->outputs_) {
retv.insert(retv.end(), ipt.second.begin(), ipt.second.end());
}
return retv;
}
void OpDescBind::SetOutput(const std::string &param_name, void OpDescBind::SetOutput(const std::string &param_name,
const std::vector<std::string> &args) { const std::vector<std::string> &args) {
need_update_ = true; need_update_ = true;
...@@ -94,6 +119,18 @@ const std::unordered_map<std::string, Attribute> &OpDescBind::GetAttrMap() ...@@ -94,6 +119,18 @@ const std::unordered_map<std::string, Attribute> &OpDescBind::GetAttrMap()
return attrs_; return attrs_;
} }
void OpDescBind::Rename(const std::string &old_name,
const std::string &new_name) {
for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name);
}
for (auto &output : outputs_) {
std::replace(output.second.begin(), output.second.end(), old_name,
new_name);
}
need_update_ = true;
}
struct SetAttrDescVisitor : public boost::static_visitor<void> { struct SetAttrDescVisitor : public boost::static_visitor<void> {
explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {}
mutable OpDesc::Attr *attr_; mutable OpDesc::Attr *attr_;
......
...@@ -27,6 +27,11 @@ class BlockDescBind; ...@@ -27,6 +27,11 @@ class BlockDescBind;
class OpDescBind { class OpDescBind {
public: public:
OpDescBind() {}
OpDescBind(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs);
OpDesc *Proto(); OpDesc *Proto();
std::string Type() const { return op_desc_.type(); } std::string Type() const { return op_desc_.type(); }
...@@ -35,11 +40,15 @@ class OpDescBind { ...@@ -35,11 +40,15 @@ class OpDescBind {
const std::vector<std::string> &Input(const std::string &name) const; const std::vector<std::string> &Input(const std::string &name) const;
std::vector<std::string> InputArgumentNames() const;
void SetInput(const std::string &param_name, void SetInput(const std::string &param_name,
const std::vector<std::string> &args); const std::vector<std::string> &args);
const std::vector<std::string> &Output(const std::string &name) const; const std::vector<std::string> &Output(const std::string &name) const;
std::vector<std::string> OutputArgumentNames() const;
void SetOutput(const std::string &param_name, void SetOutput(const std::string &param_name,
const std::vector<std::string> &args); const std::vector<std::string> &args);
...@@ -61,6 +70,8 @@ class OpDescBind { ...@@ -61,6 +70,8 @@ class OpDescBind {
int GetBlockAttr(const std::string &name) const; int GetBlockAttr(const std::string &name) const;
void Rename(const std::string &old_name, const std::string &new_name);
// Only be used in C++ // Only be used in C++
const AttributeMap &GetAttrMap() const; const AttributeMap &GetAttrMap() const;
...@@ -70,6 +81,22 @@ class OpDescBind { ...@@ -70,6 +81,22 @@ class OpDescBind {
std::vector<std::string> InputNames() const { return MapKeys(inputs_); } std::vector<std::string> InputNames() const { return MapKeys(inputs_); }
std::vector<std::string> OutputNames() const { return MapKeys(outputs_); } std::vector<std::string> OutputNames() const { return MapKeys(outputs_); }
void SetInputMap(const VariableNameMap &input) {
this->inputs_ = input;
this->need_update_ = true;
}
void SetOutputMap(const VariableNameMap &output) {
this->outputs_ = output;
this->need_update_ = true;
}
void Sync();
const VariableNameMap &Inputs() const { return inputs_; }
const VariableNameMap &Outputs() const { return outputs_; }
private: private:
template <typename MapType> template <typename MapType>
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) { static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
...@@ -81,8 +108,6 @@ class OpDescBind { ...@@ -81,8 +108,6 @@ class OpDescBind {
return ret_val; return ret_val;
} }
void Sync();
OpDesc op_desc_; OpDesc op_desc_;
VariableNameMap inputs_; VariableNameMap inputs_;
VariableNameMap outputs_; VariableNameMap outputs_;
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <map> #include <map>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "paddle/framework/attribute.h" #include "paddle/framework/attribute.h"
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/type_defs.h" #include "paddle/framework/type_defs.h"
...@@ -27,7 +28,6 @@ namespace framework { ...@@ -27,7 +28,6 @@ namespace framework {
struct OpInfo { struct OpInfo {
OpCreator creator_; OpCreator creator_;
std::string grad_op_type_;
GradOpMakerFN grad_op_maker_; GradOpMakerFN grad_op_maker_;
OpProto* proto_{nullptr}; OpProto* proto_{nullptr};
OpAttrChecker* checker_{nullptr}; OpAttrChecker* checker_{nullptr};
...@@ -43,19 +43,19 @@ struct OpInfo { ...@@ -43,19 +43,19 @@ struct OpInfo {
return *proto_; return *proto_;
} }
const OpAttrChecker& Checker() const {
PADDLE_ENFORCE_NOT_NULL(checker_,
"Operator Checker has not been registered");
return *checker_;
}
const OpCreator& Creator() const { const OpCreator& Creator() const {
PADDLE_ENFORCE_NOT_NULL(creator_, PADDLE_ENFORCE_NOT_NULL(creator_,
"Operator Creator has not been registered"); "Operator Creator has not been registered");
return creator_; return creator_;
} }
bool HasGradientOp() const { return !grad_op_type_.empty(); } const GradOpMakerFN& GradOpMaker() const {
PADDLE_ENFORCE_NOT_NULL(grad_op_maker_,
"Operator GradOpMaker has not been registered.");
return grad_op_maker_;
}
const OpAttrChecker* Checker() const { return checker_; }
}; };
class OpInfoMap { class OpInfoMap {
......
...@@ -44,11 +44,6 @@ class OpProtoAndCheckerMaker { ...@@ -44,11 +44,6 @@ class OpProtoAndCheckerMaker {
var_->set_intermediate(true); var_->set_intermediate(true);
return *this; return *this;
} }
VariableBuilder& NotInGradient() {
var_->set_not_in_gradient(true);
return *this;
}
}; };
VariableBuilder AddInput(const std::string& name, const std::string& comment); VariableBuilder AddInput(const std::string& name, const std::string& comment);
......
...@@ -48,4 +48,4 @@ TEST(ProtoMaker, DuplicatedInOut) { ...@@ -48,4 +48,4 @@ TEST(ProtoMaker, DuplicatedInOut) {
paddle::framework::OpAttrChecker op_checker; paddle::framework::OpAttrChecker op_checker;
auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
} }
\ No newline at end of file
...@@ -23,7 +23,9 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp( ...@@ -23,7 +23,9 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const std::string& type, const VariableNameMap& inputs, const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, AttributeMap attrs) { const VariableNameMap& outputs, AttributeMap attrs) {
auto& info = OpInfoMap::Instance().Get(type); auto& info = OpInfoMap::Instance().Get(type);
info.Checker().Check(attrs); if (info.Checker() != nullptr) {
info.Checker()->Check(attrs);
}
auto op = info.Creator()(type, inputs, outputs, attrs); auto op = info.Creator()(type, inputs, outputs, attrs);
return std::unique_ptr<OperatorBase>(op); return std::unique_ptr<OperatorBase>(op);
} }
...@@ -52,9 +54,15 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) { ...@@ -52,9 +54,15 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
return CreateOp(op_desc.type(), inputs, outputs, attrs); return CreateOp(op_desc.type(), inputs, outputs, attrs);
} }
std::unique_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) { std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) {
PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); return CreateOp(op_desc.Type(), op_desc.Inputs(), op_desc.Outputs(),
return std::unique_ptr<OperatorBase>(BuildGradOp(&op)); op_desc.GetAttrMap());
}
std::vector<std::unique_ptr<OpDescBind>> OpRegistry::CreateGradOpDescs(
const OpDescBind& op_desc) {
auto& info = OpInfoMap::Instance().Get(op_desc.Type());
return info.grad_op_maker_(op_desc);
} }
} // namespace framework } // namespace framework
......
...@@ -23,25 +23,37 @@ limitations under the License. */ ...@@ -23,25 +23,37 @@ limitations under the License. */
#include "paddle/framework/attribute.h" #include "paddle/framework/attribute.h"
#include "paddle/framework/details/op_registry.h" #include "paddle/framework/details/op_registry.h"
#include "paddle/framework/framework.pb.h" #include "paddle/framework/framework.pb.h"
#include "paddle/framework/grad_op_builder.h" #include "paddle/framework/grad_op_desc_maker.h"
#include "paddle/framework/op_desc.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Registrar {
public:
// In our design, various kinds of classes, e.g., operators and kernels,
// have their corresponding registry and registrar. The action of
// registration is in the constructor of a global registrar variable, which,
// however, are not used in the code that calls package framework, and would
// be removed from the generated binary file by the linker. To avoid such
// removal, we add Touch to all registrar classes and make USE_OP macros to
// call this method. So, as long as the callee code calls USE_OP, the global
// registrar variable won't be removed by the linker.
void Touch() {}
};
template <typename... ARGS> template <typename... ARGS>
struct OperatorRegistrar { struct OperatorRegistrar : public Registrar {
explicit OperatorRegistrar(const char* op_type) : op_type(op_type) { explicit OperatorRegistrar(const char* op_type) : op_type(op_type) {
PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type), PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type),
"'%s' is registered more than once.", op_type); "'%s' is registered more than once.", op_type);
static_assert(sizeof...(ARGS) != 0, static_assert(sizeof...(ARGS) != 0,
"OperatorRegistrar should be invoked at least by OpClass"); "OperatorRegistrar should be invoked at least by OpClass");
details::OperatorRegistrarRecursive<0, false, ARGS...>(op_type, &info); details::OperatorRegistrarRecursive<0, false, ARGS...>(op_type, &info);
OpInfoMap::Instance().Insert(op_type, info);
} }
~OperatorRegistrar() { OpInfoMap::Instance().Insert(op_type, info); }
const char* op_type; const char* op_type;
OpInfo info; OpInfo info;
...@@ -67,20 +79,10 @@ class OpRegistry { ...@@ -67,20 +79,10 @@ class OpRegistry {
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static std::unique_ptr<OperatorBase> CreateGradOp(const OperatorBase& op); static std::vector<std::unique_ptr<OpDescBind>> CreateGradOpDescs(
}; const OpDescBind& op_desc);
class Registrar { static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
public:
// In our design, various kinds of classes, e.g., operators and kernels,
// have their corresponding registry and registrar. The action of
// registration is in the constructor of a global registrar variable, which,
// however, are not used in the code that calls package framework, and would
// be removed from the generated binary file by the linker. To avoid such
// removal, we add Touch to all registrar classes and make USE_OP macros to
// call this method. So, as long as the callee code calls USE_OP, the global
// registrar variable won't be removed by the linker.
void Touch() {}
}; };
template <typename OpType, typename ProtoMakerType, typename GradOpType> template <typename OpType, typename ProtoMakerType, typename GradOpType>
...@@ -138,33 +140,41 @@ class OpKernelRegistrar : public Registrar { ...@@ -138,33 +140,41 @@ class OpKernelRegistrar : public Registrar {
__test_global_namespace_##uniq_name##__>::value, \ __test_global_namespace_##uniq_name##__>::value, \
msg) msg)
#define REGISTER_OPERATOR(op_type, op_class, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op__##op_type, \
"REGISTER_OPERATOR must be called in global namespace"); \
class _OpClass_##op_type##_ : public op_class { \
public: \
DEFINE_OP_CLONE_METHOD(_OpClass_##op_type##_); \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class); \
}; \
static ::paddle::framework::OperatorRegistrar<_OpClass_##op_type##_, \
##__VA_ARGS__> \
__op_registrar_##op_type##__(#op_type); \
int TouchOpRegistrar_##op_type() { \
__op_registrar_##op_type##__.Touch(); \
return 0; \
}
/** /**
* Macro to register Operator. * Macro to register Operator.
*/ */
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ #define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class) \ grad_op_class) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ REGISTER_OPERATOR(grad_op_type, grad_op_class); \
__reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ class _GradOpDescMaker_##grad_op_type##_ \
class _OpClass_##op_type##_ : public op_class { \ : public ::paddle::framework::DefaultGradOpDescMaker { \
public: \ using ::paddle::framework::DefaultGradOpDescMaker::DefaultGradOpDescMaker; \
DEFINE_OP_CLONE_METHOD(_OpClass_##op_type##_); \ \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class); \ protected: \
}; \ virtual std::string GradOpType() const { return #grad_op_type; } \
class _OpGradClass_##op_type##_ : public grad_op_class { \ }; \
public: \ REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \
DEFINE_OP_CLONE_METHOD(_OpGradClass_##op_type##_); \ op_maker_class);
DEFINE_OP_CONSTRUCTOR(_OpGradClass_##op_type##_, grad_op_class); \
}; \
static ::paddle::framework::OpRegistrar< \
_OpClass_##op_type##_, op_maker_class, _OpGradClass_##op_type##_> \
__op_registrar_##op_type##__(#op_type, #grad_op_type); \
int TouchOpRegistrar_##op_type() { \
__op_registrar_##op_type##__.Touch(); \
return 0; \
}
#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \
REGISTER_OP(op_type, op_class, op_maker_class, , ::paddle::framework::NOP) REGISTER_OPERATOR(op_type, op_class, op_maker_class)
/** /**
* Macro to register OperatorKernel. * Macro to register OperatorKernel.
...@@ -211,7 +221,7 @@ class OpKernelRegistrar : public Registrar { ...@@ -211,7 +221,7 @@ class OpKernelRegistrar : public Registrar {
// TODO(fengjiayi): The following macros // TODO(fengjiayi): The following macros
// seems ugly, do we have better method? // seems ugly, do we have better method?
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)
#else #else
#define USE_OP_KERNEL(op_type) \ #define USE_OP_KERNEL(op_type) \
......
...@@ -183,4 +183,4 @@ class CosineOpComplete : public paddle::framework::CosineOp { ...@@ -183,4 +183,4 @@ class CosineOpComplete : public paddle::framework::CosineOp {
TEST(OperatorRegistrar, Test) { TEST(OperatorRegistrar, Test) {
using namespace paddle::framework; using namespace paddle::framework;
OperatorRegistrar<CosineOpComplete, CosineOpProtoAndCheckerMaker> reg("cos"); OperatorRegistrar<CosineOpComplete, CosineOpProtoAndCheckerMaker> reg("cos");
} }
\ No newline at end of file
...@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice< ...@@ -25,7 +25,7 @@ Eigen::DefaultDevice& ExecutionContext::GetEigenDevice<
return *device_context_.GetEigenDevice<platform::CPUPlace>(); return *device_context_.GetEigenDevice<platform::CPUPlace>();
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
Eigen::GpuDevice& Eigen::GpuDevice&
ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const { ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
...@@ -205,13 +205,13 @@ void OperatorBase::GenerateTemporaryNames() { ...@@ -205,13 +205,13 @@ void OperatorBase::GenerateTemporaryNames() {
} }
template <> template <>
const Tensor* InferShapeContext::Input<Tensor>(const std::string& name) const { const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
auto* var = InputVar(name); auto* var = InputVar(name);
return var == nullptr ? nullptr : GetTensorFromVar(var); return var == nullptr ? nullptr : GetTensorFromVar(var);
} }
template <> template <>
const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>( const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
const std::string& name) const { const std::string& name) const {
auto names = op().Inputs(name); auto names = op().Inputs(name);
std::vector<const Tensor*> res; std::vector<const Tensor*> res;
...@@ -225,13 +225,13 @@ const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>( ...@@ -225,13 +225,13 @@ const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>(
} }
template <> template <>
Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const { Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
auto var = OutputVar(name); auto var = OutputVar(name);
return var == nullptr ? nullptr : var->GetMutable<LoDTensor>(); return var == nullptr ? nullptr : var->GetMutable<LoDTensor>();
} }
template <> template <>
std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>( std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
const std::string& name) const { const std::string& name) const {
auto names = op().Outputs(name); auto names = op().Outputs(name);
std::vector<Tensor*> res; std::vector<Tensor*> res;
......
...@@ -22,6 +22,7 @@ limitations under the License. */ ...@@ -22,6 +22,7 @@ limitations under the License. */
#include "op_info.h" #include "op_info.h"
#include "paddle/framework/attribute.h" #include "paddle/framework/attribute.h"
#include "paddle/framework/block_desc.h"
#include "paddle/framework/data_type.h" #include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h" #include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
...@@ -56,7 +57,6 @@ inline std::string GradVarName(const std::string& var_name) { ...@@ -56,7 +57,6 @@ inline std::string GradVarName(const std::string& var_name) {
} }
class OperatorBase; class OperatorBase;
class InferShapeContext;
class ExecutionContext; class ExecutionContext;
extern const Tensor* GetTensorFromVar(const Variable* var); extern const Tensor* GetTensorFromVar(const Variable* var);
...@@ -168,10 +168,11 @@ class NOP : public OperatorBase { ...@@ -168,10 +168,11 @@ class NOP : public OperatorBase {
} }
}; };
class InferShapeContext { class ExecutionContext {
public: public:
InferShapeContext(const OperatorBase& op, const Scope& scope) ExecutionContext(const OperatorBase& op, const Scope& scope,
: op_(op), scope_(scope) {} const platform::DeviceContext& device_context)
: op_(op), scope_(scope), device_context_(device_context) {}
const OperatorBase& op() const { return op_; } const OperatorBase& op() const { return op_; }
...@@ -277,66 +278,153 @@ class InferShapeContext { ...@@ -277,66 +278,153 @@ class InferShapeContext {
out_tensor->set_lod(in_tensor.lod()); out_tensor->set_lod(in_tensor.lod());
} }
template <typename PlaceType,
typename DeviceType = typename platform::EigenDeviceConverter<
PlaceType>::EigenDeviceType>
DeviceType& GetEigenDevice() const;
platform::Place GetPlace() const { return device_context_.GetPlace(); }
const platform::DeviceContext& device_context() const {
return device_context_;
}
private: private:
const OperatorBase& op_; const OperatorBase& op_;
const Scope& scope_; const Scope& scope_;
const platform::DeviceContext& device_context_;
}; };
template <> template <>
const Tensor* InferShapeContext::Input<Tensor>(const std::string& name) const; const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const;
template <> template <>
const std::vector<const Tensor*> InferShapeContext::MultiInput<Tensor>( const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
const std::string& name) const; const std::string& name) const;
template <> template <>
Tensor* InferShapeContext::Output<Tensor>(const std::string& name) const; Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const;
template <> template <>
std::vector<Tensor*> InferShapeContext::MultiOutput<Tensor>( std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
const std::string& name) const; const std::string& name) const;
class ExecutionContext : public InferShapeContext { class CompileTimeInferShapeContext : public InferShapeContext {
public: public:
ExecutionContext(const OperatorBase& op, const Scope& scope, CompileTimeInferShapeContext(const OpDescBind& op, const BlockDescBind& block)
const platform::DeviceContext& device_context) : op_(op), block_(block) {}
: InferShapeContext(op, scope), device_context_(device_context) {}
bool HasInput(const std::string& name) const override {
const std::vector<std::string>& input_names = op_.Input(name);
auto length = input_names.size();
PADDLE_ENFORCE_EQ(length, 1UL,
"Input(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVar(input_names[0]);
}
bool HasOutput(const std::string& name) const override {
const std::vector<std::string>& output_names = op_.Output(name);
auto length = output_names.size();
PADDLE_ENFORCE_EQ(length, 1UL,
"Output(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVar(output_names[0]);
}
bool HasInputs(const std::string& name) const override {
const std::vector<std::string>& input_names = op_.Input(name);
PADDLE_ENFORCE(!input_names.empty(), "Inputs(%s) length is 0", name);
for (auto& input : input_names) {
if (!block_.HasVar(input)) return false;
}
return true;
}
template <typename PlaceType, bool HasOutputs(const std::string& name) const override {
typename DeviceType = typename platform::EigenDeviceConverter< const std::vector<std::string>& output_names = op_.Output(name);
PlaceType>::EigenDeviceType> PADDLE_ENFORCE(!output_names.empty(), "Inputs(%s) length is 0", name);
DeviceType& GetEigenDevice() const; for (auto& output : output_names) {
if (!block_.HasVar(output)) return false;
}
return true;
}
platform::Place GetPlace() const { return device_context_.GetPlace(); } DDim GetInputDim(const std::string& name) const override {
std::vector<DDim> ddims = GetInputsDim(name);
auto length = ddims.size();
PADDLE_ENFORCE_EQ(length, 1UL,
"Input(%s) should have 1 value, "
"but it has %d now",
name, length);
return ddims[0];
}
const platform::DeviceContext& device_context() const { void SetInputDim(const std::string& name, const DDim& dim) override {
return device_context_; SetInputsDim(name, {dim});
}
DDim GetOutputDim(const std::string& name) const override {
std::vector<DDim> ddims = GetOutputsDim(name);
auto length = ddims.size();
PADDLE_ENFORCE_EQ(length, 1UL,
"Output(%s) should have 1 value, "
"but it has %d now",
name, length);
return ddims[0];
}
void SetOutputDim(const std::string& name, const DDim& dim) override {
SetOutputsDim(name, {dim});
}
AttrReader Attrs() const override { return AttrReader(op_.GetAttrMap()); }
const std::vector<std::string>& Inputs(
const std::string& name) const override {
return op_.Input(name);
}
const std::vector<std::string>& Outputs(
const std::string& name) const override {
return op_.Output(name);
} }
private: private:
const platform::DeviceContext& device_context_; DDim GetDim(const std::string& name) const override {
return framework::make_ddim(block_.Var(name)->Shape());
}
void SetDim(const std::string& name, const DDim& dim) override {
block_.Var(name)->SetShape(framework::vectorize(dim));
}
const OpDescBind& op_;
const BlockDescBind& block_;
}; };
class RuntimeInferShapeContext : public InferShapeContextBase { class RuntimeInferShapeContext : public InferShapeContext {
public: public:
RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope) RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope)
: op_(op), scope_(scope) {} : op_(op), scope_(scope) {}
bool HasInput(const std::string& name) const { bool HasInput(const std::string& name) const override {
auto ipt = op_.Input(name); auto ipt = op_.Input(name);
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr; return var != nullptr;
} }
bool HasOutput(const std::string& name) const { bool HasOutput(const std::string& name) const override {
auto ipt = op_.Output(name); auto ipt = op_.Output(name);
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr; return var != nullptr;
} }
bool HasInputs(const std::string& name) const { bool HasInputs(const std::string& name) const override {
auto inputs = op_.Inputs(name); auto inputs = op_.Inputs(name);
if (inputs.size() == 0UL) { if (inputs.empty()) {
return false; return false;
} }
for (auto& input : inputs) { for (auto& input : inputs) {
...@@ -347,9 +435,9 @@ class RuntimeInferShapeContext : public InferShapeContextBase { ...@@ -347,9 +435,9 @@ class RuntimeInferShapeContext : public InferShapeContextBase {
return true; return true;
} }
bool HasOutputs(const std::string& name) const { bool HasOutputs(const std::string& name) const override {
auto outputs = op_.Outputs(name); auto outputs = op_.Outputs(name);
if (outputs.size() == 0UL) { if (outputs.empty()) {
return false; return false;
} }
for (auto& output : outputs) { for (auto& output : outputs) {
...@@ -360,29 +448,31 @@ class RuntimeInferShapeContext : public InferShapeContextBase { ...@@ -360,29 +448,31 @@ class RuntimeInferShapeContext : public InferShapeContextBase {
return true; return true;
} }
DDim GetInputDim(const std::string& name) const { DDim GetInputDim(const std::string& name) const override {
return GetDim(op_.Input(name)); return GetDim(op_.Input(name));
} }
void SetInputDim(const std::string& name, const DDim& dim) { void SetInputDim(const std::string& name, const DDim& dim) override {
SetDim(op_.Input(name), dim); SetDim(op_.Input(name), dim);
} }
DDim GetOutputDim(const std::string& name) const { DDim GetOutputDim(const std::string& name) const override {
return GetDim(op_.Output(name)); return GetDim(op_.Output(name));
} }
void SetOutputDim(const std::string& name, const DDim& dim) { void SetOutputDim(const std::string& name, const DDim& dim) override {
SetDim(op_.Output(name), dim); SetDim(op_.Output(name), dim);
} }
AttrReader Attrs() const { return AttrReader(op_.Attrs()); } AttrReader Attrs() const override { return AttrReader(op_.Attrs()); }
const std::vector<std::string>& Inputs(const std::string& name) const { const std::vector<std::string>& Inputs(
const std::string& name) const override {
return op_.Inputs(name); return op_.Inputs(name);
} }
const std::vector<std::string>& Outputs(const std::string& name) const { const std::vector<std::string>& Outputs(
const std::string& name) const override {
return op_.Outputs(name); return op_.Outputs(name);
} }
...@@ -403,11 +493,11 @@ class RuntimeInferShapeContext : public InferShapeContextBase { ...@@ -403,11 +493,11 @@ class RuntimeInferShapeContext : public InferShapeContextBase {
return t; return t;
} }
DDim GetDim(const std::string& name) const { DDim GetDim(const std::string& name) const override {
return GetTensor<false>(name)->dims(); return GetTensor<false>(name)->dims();
} }
void SetDim(const std::string& name, const DDim& dim) { void SetDim(const std::string& name, const DDim& dim) override {
GetTensor<true>(name)->Resize(dim); GetTensor<true>(name)->Resize(dim);
} }
...@@ -513,9 +603,9 @@ class OperatorWithKernel : public OperatorBase { ...@@ -513,9 +603,9 @@ class OperatorWithKernel : public OperatorBase {
}); });
} }
protected: virtual void InferShape(InferShapeContext* ctx) const = 0;
virtual void InferShape(InferShapeContextBase* ctx) const = 0;
protected:
// indicate kernel DataType by input data. Defaultly all input data must be // indicate kernel DataType by input data. Defaultly all input data must be
// same. // same.
virtual DataType IndicateDataType(const ExecutionContext& ctx) const { virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
......
...@@ -113,7 +113,7 @@ class OpWithKernelTest : public OperatorWithKernel { ...@@ -113,7 +113,7 @@ class OpWithKernelTest : public OperatorWithKernel {
using OperatorWithKernel::OperatorWithKernel; using OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override {} void InferShape(framework::InferShapeContext* ctx) const override {}
DataType IndicateDataType(const ExecutionContext& ctx) const override { DataType IndicateDataType(const ExecutionContext& ctx) const override {
return DataType::FP32; return DataType::FP32;
} }
......
...@@ -19,9 +19,12 @@ limitations under the License. */ ...@@ -19,9 +19,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class InferShapeContextBase { // TODO(longfei): Once after both CompileTimeInferShapeContext and
// RuntimeInferShapeContext get merged, we can rename InferShapeContext into
// InferShapeContext so to replace the current InferShapeContext.
class InferShapeContext {
public: public:
virtual ~InferShapeContextBase() {} virtual ~InferShapeContext() {}
virtual bool HasInput(const std::string &name) const = 0; virtual bool HasInput(const std::string &name) const = 0;
virtual bool HasOutput(const std::string &name) const = 0; virtual bool HasOutput(const std::string &name) const = 0;
......
...@@ -217,12 +217,11 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -217,12 +217,11 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
// collect indice need to copy to the batch // collect indice need to copy to the batch
std::vector<size_t> indice; std::vector<size_t> indice;
for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) { for (const auto& seq : meta) {
const auto& seq_meta = meta[seq_id]; size_t id = seq.begin + index;
if (index >= seq_meta.end) break; if (id >= seq.end) break;
indice.push_back(seq_meta.begin + index); indice.push_back(id);
} }
PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index);
// copy the indice of records in LoDTensor // copy the indice of records in LoDTensor
...@@ -232,16 +231,18 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -232,16 +231,18 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
result.Resize(make_ddim(record_dims_vec)); result.Resize(make_ddim(record_dims_vec));
result.mutable_data<value_type>(platform::CPUPlace()); result.mutable_data<value_type>(platform::CPUPlace());
for (size_t i = 0; i < indice.size() - 1; i++) { for (size_t i = 0; i < indice.size(); i++) {
auto index = indice[i]; auto index = indice[i];
auto target = result.Slice<value_type>(i, i + 1); auto target = result.Slice<value_type>(i, i + 1);
auto source_ = source->Slice<value_type>(index, index + 1); auto source_ = source->Slice<value_type>(index, index + 1);
target.CopyFrom<value_type>(source_, platform::CPUPlace()); target.CopyFrom<value_type>(source_, platform::CPUPlace());
} }
return result; return result;
} }
// TODO(supejom) to cache lod if reasonable
LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
const std::vector<DySeqMeta>& meta, const LoD& lod, const std::vector<DySeqMeta>& meta, const LoD& lod,
size_t level) { size_t level) {
...@@ -273,7 +274,6 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, ...@@ -273,7 +274,6 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
} }
result.set_lod(lod); result.set_lod(lod);
return result; return result;
} }
......
...@@ -26,6 +26,9 @@ namespace framework { ...@@ -26,6 +26,9 @@ namespace framework {
* in original lod-tensor. * in original lod-tensor.
*/ */
struct DySeqMeta { struct DySeqMeta {
DySeqMeta(size_t begin, size_t end, size_t ori_idx)
: begin(begin), end(end), ori_idx(ori_idx) {}
size_t begin; size_t begin;
size_t end; // not included size_t end; // not included
size_t ori_idx; size_t ori_idx;
......
...@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) { ...@@ -65,7 +65,7 @@ inline T* Tensor::mutable_data(platform::Place place) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>( holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size)); boost::get<platform::CPUPlace>(place), size));
} else if (platform::is_gpu_place(place)) { } else if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
} }
#else #else
...@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src, ...@@ -103,7 +103,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size); boost::get<platform::CPUPlace>(src_place), src_ptr, size);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && else if (platform::is_gpu_place(src_place) &&
platform::is_cpu_place(dst_place)) { platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......
...@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) { ...@@ -74,7 +74,7 @@ TEST(Tensor, MutableData) {
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
float* p1 = nullptr; float* p1 = nullptr;
...@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) { ...@@ -126,7 +126,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>()); ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
...@@ -163,7 +163,7 @@ TEST(Tensor, Slice) { ...@@ -163,7 +163,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace()); src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
...@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) { ...@@ -218,7 +218,7 @@ TEST(Tensor, CopyFrom) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]); EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor gpu_tensor; Tensor gpu_tensor;
......
...@@ -194,7 +194,7 @@ public: ...@@ -194,7 +194,7 @@ public:
REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward); REGISTER_TYPED_FUNC(BlockExpand, CPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward); REGISTER_TYPED_FUNC(BlockExpandGrad, CPU, BlockExpandBackward);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward); REGISTER_TYPED_FUNC(BlockExpand, GPU, BlockExpandForward);
REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward); REGISTER_TYPED_FUNC(BlockExpandGrad, GPU, BlockExpandBackward);
#endif #endif
......
...@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, ...@@ -395,7 +395,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward, REGISTER_TYPED_FUNC(ContextProjectionBackward,
CPU, CPU,
ContextProjectionBackwardFunc); ContextProjectionBackwardFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionForward,
GPU, GPU,
ContextProjectionForwardFunc); ContextProjectionForwardFunc);
......
...@@ -233,7 +233,7 @@ private: ...@@ -233,7 +233,7 @@ private:
REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc); REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc); REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc); REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc);
REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc); REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc);
#endif #endif
......
...@@ -169,7 +169,7 @@ private: ...@@ -169,7 +169,7 @@ private:
REGISTER_TYPED_FUNC(Crop, CPU, CropFunc); REGISTER_TYPED_FUNC(Crop, CPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc); REGISTER_TYPED_FUNC(CropGrad, CPU, CropGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(Crop, GPU, CropFunc); REGISTER_TYPED_FUNC(Crop, GPU, CropFunc);
REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc); REGISTER_TYPED_FUNC(CropGrad, GPU, CropGradFunc);
#endif #endif
......
...@@ -336,7 +336,7 @@ private: ...@@ -336,7 +336,7 @@ private:
REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc); REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc); REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc);
REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc); REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc);
#endif #endif
......
...@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput, ...@@ -292,7 +292,7 @@ REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
REGISTER_TYPED_FUNC(DepthwiseConvGradFilter, REGISTER_TYPED_FUNC(DepthwiseConvGradFilter,
CPU, CPU,
DepthwiseConvGradFilterFunction); DepthwiseConvGradFilterFunction);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction); REGISTER_TYPED_FUNC(DepthwiseConv, GPU, DepthwiseConvFunction);
REGISTER_TYPED_FUNC(DepthwiseConvGradInput, REGISTER_TYPED_FUNC(DepthwiseConvGradInput,
GPU, GPU,
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(DepthwiseConv, Forward) { TEST(DepthwiseConv, Forward) {
DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>( DepthwiseConvolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "DepthwiseConv-GPU", forward); "GemmConv-CPU", "DepthwiseConv-GPU", forward);
......
...@@ -340,7 +340,7 @@ public: ...@@ -340,7 +340,7 @@ public:
REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction); REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction); REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction); REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction); REGISTER_TYPED_FUNC(GemmConv, GPU, GemmConvFunction);
REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction); REGISTER_TYPED_FUNC(GemmConvGradInput, GPU, GemmConvGradInputFunction);
REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction); REGISTER_TYPED_FUNC(GemmConvGradFilter, GPU, GemmConvGradFilterFunction);
......
...@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) { ...@@ -24,7 +24,7 @@ TEST(GemmConv, NaiveConv) {
"NaiveConv-CPU", "GemmConv-CPU", forward); "NaiveConv-CPU", "GemmConv-CPU", forward);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(GemmConv, Forward) { TEST(GemmConv, Forward) {
Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>( Convolution<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU>(
"GemmConv-CPU", "GemmConv-GPU", forward); "GemmConv-CPU", "GemmConv-GPU", forward);
......
...@@ -116,7 +116,7 @@ void TestIm2ColFunctor() { ...@@ -116,7 +116,7 @@ void TestIm2ColFunctor() {
TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); } TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); } TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor<DEVICE_TYPE_GPU, float>(); }
......
...@@ -341,7 +341,7 @@ private: ...@@ -341,7 +341,7 @@ private:
}; };
REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc); REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc); REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc);
#endif #endif
} // namespace paddle } // namespace paddle
...@@ -207,7 +207,7 @@ private: ...@@ -207,7 +207,7 @@ private:
REGISTER_TYPED_FUNC(Pad, CPU, PadFunc); REGISTER_TYPED_FUNC(Pad, CPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc); REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(Pad, GPU, PadFunc); REGISTER_TYPED_FUNC(Pad, GPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc); REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc);
#endif #endif
......
...@@ -217,7 +217,7 @@ public: ...@@ -217,7 +217,7 @@ public:
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc); REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc); REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc); REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc); REGISTER_TYPED_FUNC(RowConvGrad, GPU, RowConvGradFunc);
#endif #endif
......
...@@ -132,7 +132,7 @@ public: ...@@ -132,7 +132,7 @@ public:
REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc); REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc); REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc); REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc);
REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc); REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc);
#endif #endif
......
...@@ -60,6 +60,36 @@ if(NOT WITH_PYTHON) ...@@ -60,6 +60,36 @@ if(NOT WITH_PYTHON)
dataproviders/PyDataProvider.h) dataproviders/PyDataProvider.h)
endif() endif()
if(MOBILE_INFERENCE)
# Remove evaluators
list(REMOVE_ITEM GSERVER_SOURCES
layers/ValidationLayer.cpp
evaluators/Evaluator.cpp
evaluators/DetectionMAPEvaluator.cpp
evaluators/CTCErrorEvaluator.cpp
evaluators/ChunkEvaluator.cpp)
# Remove dataproviders
list(REMOVE_ITEM GSERVER_SOURCES
dataproviders/DataProvider.cpp
dataproviders/MultiDataProvider.cpp
dataproviders/ProtoDataProvider.cpp
dataproviders/PyDataProvider2.cpp
dataproviders/PyDataProvider.cpp)
# Remove useless gradientmachines
list(REMOVE_ITEM GSERVER_SOURCES
gradientmachines/MultiNetwork.cpp
gradientmachines/RecurrentGradientMachine.cpp
gradientmachines/ParallelNeuralNetwork.cpp
gradientmachines/GradientMachineMode.cpp
gradientmachines/MultiGradientMachine.cpp)
# Remove useless layers
list(REMOVE_ITEM GSERVER_SOURCES
layers/RecurrentLayerGroup.cpp)
endif()
if(WITH_GPU) if(WITH_GPU)
cuda_add_library(paddle_gserver ${GSERVER_SOURCES}) cuda_add_library(paddle_gserver ${GSERVER_SOURCES})
else() else()
......
...@@ -17,12 +17,15 @@ limitations under the License. */ ...@@ -17,12 +17,15 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "NeuralNetwork.h"
#include "hl_gpu.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "GradientMachineMode.h" #include "GradientMachineMode.h"
#include "MultiGradientMachine.h" #include "MultiGradientMachine.h"
#include "MultiNetwork.h" #include "MultiNetwork.h"
#include "NeuralNetwork.h"
#include "ParallelNeuralNetwork.h" #include "ParallelNeuralNetwork.h"
#include "hl_gpu.h" #endif
namespace paddle { namespace paddle {
...@@ -30,13 +33,16 @@ GradientMachine* GradientMachine::create( ...@@ -30,13 +33,16 @@ GradientMachine* GradientMachine::create(
const ModelConfig& config, const ModelConfig& config,
int mode, int mode,
const std::vector<ParameterType>& parameterTypes) { const std::vector<ParameterType>& parameterTypes) {
#ifndef PADDLE_MOBILE_INFERENCE
if (auto gm = IGradientMachineMode::tryCreateGradientMachine(mode, config)) { if (auto gm = IGradientMachineMode::tryCreateGradientMachine(mode, config)) {
return gm; return gm;
} }
if (FLAGS_trainer_count > 1) { if (FLAGS_trainer_count > 1) {
return new MultiGradientMachine(config, FLAGS_use_gpu); return new MultiGradientMachine(config, FLAGS_use_gpu);
} }
#endif
if (FLAGS_trainer_count == 1) { // single if (FLAGS_trainer_count == 1) { // single
#ifndef PADDLE_MOBILE_INFERENCE
NeuralNetwork* nn; NeuralNetwork* nn;
if (config.type() == "multi_nn") { if (config.type() == "multi_nn") {
/* multi submodel calculate, thread(s) will be initialized inside */ /* multi submodel calculate, thread(s) will be initialized inside */
...@@ -48,6 +54,9 @@ GradientMachine* GradientMachine::create( ...@@ -48,6 +54,9 @@ GradientMachine* GradientMachine::create(
/* single thread calculate */ /* single thread calculate */
nn = NeuralNetwork::create(config); nn = NeuralNetwork::create(config);
} }
#else
NeuralNetwork* nn = NeuralNetwork::create(config);
#endif
ParamInitCallback testParamInitCb = [](int paramId, Parameter* para) { ParamInitCallback testParamInitCb = [](int paramId, Parameter* para) {
para->enableType(PARAMETER_VALUE); para->enableType(PARAMETER_VALUE);
}; };
......
...@@ -20,13 +20,16 @@ limitations under the License. */ ...@@ -20,13 +20,16 @@ limitations under the License. */
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "TrainerConfig.pb.h" #include "TrainerConfig.pb.h"
#include "paddle/gserver/dataproviders/DataProvider.h" #include "paddle/gserver/dataproviders/DataProvider.h"
#include "paddle/gserver/evaluators/Evaluator.h"
#include "paddle/gserver/layers/Layer.h" #include "paddle/gserver/layers/Layer.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/parameter/Parameter.h" #include "paddle/parameter/Parameter.h"
#include "paddle/parameter/ParameterUpdaterBase.h" #include "paddle/parameter/ParameterUpdaterBase.h"
#include "paddle/utils/Thread.h" #include "paddle/utils/Thread.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "paddle/gserver/evaluators/Evaluator.h"
#endif
namespace paddle { namespace paddle {
/** /**
* @brief A gradient machine is capable of calculating some outputs given * @brief A gradient machine is capable of calculating some outputs given
...@@ -147,6 +150,7 @@ public: ...@@ -147,6 +150,7 @@ public:
virtual void onPassEnd() = 0; virtual void onPassEnd() = 0;
#ifndef PADDLE_MOBILE_INFERENCE
/** /**
* Create an evaluator which can be used for eval() * Create an evaluator which can be used for eval()
*/ */
...@@ -156,6 +160,7 @@ public: ...@@ -156,6 +160,7 @@ public:
* evaluate using the given evaluator * evaluate using the given evaluator
*/ */
virtual void eval(Evaluator* evaluator) const = 0; virtual void eval(Evaluator* evaluator) const = 0;
#endif
std::vector<ParameterPtr>& getParameters() { return parameters_; } std::vector<ParameterPtr>& getParameters() { return parameters_; }
......
...@@ -14,15 +14,17 @@ limitations under the License. */ ...@@ -14,15 +14,17 @@ limitations under the License. */
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
#include "NeuralNetwork.h"
#include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/CustomStackTrace.h" #include "paddle/utils/CustomStackTrace.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "MultiNetwork.h" #include "MultiNetwork.h"
#include "NeuralNetwork.h"
#include "RecurrentGradientMachine.h" #include "RecurrentGradientMachine.h"
#include "hl_gpu.h" #endif
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/Stat.h"
namespace paddle { namespace paddle {
void parameterInitNN(int paramId, void parameterInitNN(int paramId,
...@@ -54,6 +56,7 @@ void parameterInitNN(int paramId, ...@@ -54,6 +56,7 @@ void parameterInitNN(int paramId,
} }
NeuralNetwork* NeuralNetwork::create(const ModelConfig& config) { NeuralNetwork* NeuralNetwork::create(const ModelConfig& config) {
#ifndef PADDLE_MOBILE_INFERENCE
if (config.type() == "recurrent_nn") { if (config.type() == "recurrent_nn") {
return newNeuralNetwork("root"); return newNeuralNetwork("root");
} else if (config.type() == "multi_nn") { } else if (config.type() == "multi_nn") {
...@@ -61,6 +64,9 @@ NeuralNetwork* NeuralNetwork::create(const ModelConfig& config) { ...@@ -61,6 +64,9 @@ NeuralNetwork* NeuralNetwork::create(const ModelConfig& config) {
} else { } else {
return newNeuralNetwork(); return newNeuralNetwork();
} }
#else
return new NeuralNetwork();
#endif
} }
std::map<std::string, bool> NeuralNetwork::dllInitMap; std::map<std::string, bool> NeuralNetwork::dllInitMap;
...@@ -304,6 +310,8 @@ void NeuralNetwork::onPassEnd() { ...@@ -304,6 +310,8 @@ void NeuralNetwork::onPassEnd() {
} }
} }
#ifndef PADDLE_MOBILE_INFERENCE
class CombinedEvaluator : public Evaluator { class CombinedEvaluator : public Evaluator {
public: public:
void addEvaluator(std::unique_ptr<Evaluator>&& evaluator) { void addEvaluator(std::unique_ptr<Evaluator>&& evaluator) {
...@@ -466,6 +474,8 @@ Evaluator* NeuralNetwork::makeEvaluator() const { ...@@ -466,6 +474,8 @@ Evaluator* NeuralNetwork::makeEvaluator() const {
void NeuralNetwork::eval(Evaluator* evaluator) const { evaluator->eval(*this); } void NeuralNetwork::eval(Evaluator* evaluator) const { evaluator->eval(*this); }
#endif
void NeuralNetwork::setOutputGrad(const std::vector<Argument>& args) { void NeuralNetwork::setOutputGrad(const std::vector<Argument>& args) {
CHECK_GE(outputLayers_.size(), args.size()); CHECK_GE(outputLayers_.size(), args.size());
for (size_t i = 0; i < args.size(); ++i) { for (size_t i = 0; i < args.size(); ++i) {
......
...@@ -97,9 +97,12 @@ public: ...@@ -97,9 +97,12 @@ public:
virtual void onPassEnd(); virtual void onPassEnd();
#ifndef PADDLE_MOBILE_INFERENCE
virtual Evaluator* makeEvaluator() const; virtual Evaluator* makeEvaluator() const;
virtual void eval(Evaluator* evaluator) const; virtual void eval(Evaluator* evaluator) const;
#endif
virtual void resetState(); virtual void resetState();
virtual void setOutputGrad(const std::vector<Argument>& args); virtual void setOutputGrad(const std::vector<Argument>& args);
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "BatchNormalizationLayer.h" #include "BatchNormalizationLayer.h"
#include "Layer.h" #include "Layer.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "CudnnBatchNormLayer.h" #include "CudnnBatchNormLayer.h"
#endif #endif
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "hl_batch_transpose.h" #include "hl_batch_transpose.h"
#endif #endif
#include "BatchNormalizationLayer.h" #include "BatchNormalizationLayer.h"
...@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) { ...@@ -90,7 +90,7 @@ void BatchNormalizationLayer::expandMat(const MatrixPtr& in, MatrixPtr& out) {
size_t batchSize = in->getHeight(); size_t batchSize = in->getHeight();
CHECK_EQ(out->getHeight(), batchSize * imgPixels_); CHECK_EQ(out->getHeight(), batchSize * imgPixels_);
if (useGpu_) { if (useGpu_) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
LOG(FATAL) << "paddle is compiled only for cpu"; LOG(FATAL) << "paddle is compiled only for cpu";
#else #else
batchTranspose( batchTranspose(
...@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) { ...@@ -127,7 +127,7 @@ void BatchNormalizationLayer::shrinkMat(const MatrixPtr& in, MatrixPtr& out) {
} }
CHECK_EQ(in->getHeight(), static_cast<size_t>(batchSize * imgPixels_)); CHECK_EQ(in->getHeight(), static_cast<size_t>(batchSize * imgPixels_));
if (useGpu_) { if (useGpu_) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
LOG(FATAL) << "paddle is compiled only for cpu"; LOG(FATAL) << "paddle is compiled only for cpu";
#else #else
batchTranspose( batchTranspose(
......
...@@ -15,11 +15,14 @@ limitations under the License. */ ...@@ -15,11 +15,14 @@ limitations under the License. */
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
#include "CostLayer.h" #include "CostLayer.h"
#include "ValidationLayer.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Error.h" #include "paddle/utils/Error.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "ValidationLayer.h"
#endif
DEFINE_bool(log_error_clipping, false, "enable log error clipping or not"); DEFINE_bool(log_error_clipping, false, "enable log error clipping or not");
namespace paddle { namespace paddle {
...@@ -103,10 +106,12 @@ LayerPtr Layer::create(const LayerConfig& config) { ...@@ -103,10 +106,12 @@ LayerPtr Layer::create(const LayerConfig& config) {
return LayerPtr(new MultiClassCrossEntropy(config)); return LayerPtr(new MultiClassCrossEntropy(config));
else if (type == "rank-cost") else if (type == "rank-cost")
return LayerPtr(new RankingCost(config)); return LayerPtr(new RankingCost(config));
#ifndef PADDLE_MOBILE_INFERENCE
else if (type == "auc-validation") else if (type == "auc-validation")
return LayerPtr(new AucValidation(config)); return LayerPtr(new AucValidation(config));
else if (type == "pnpair-validation") else if (type == "pnpair-validation")
return LayerPtr(new PnpairValidation(config)); return LayerPtr(new PnpairValidation(config));
#endif
return LayerPtr(registrar_.createByType(config.type(), config)); return LayerPtr(registrar_.createByType(config.type(), config));
} }
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "PoolLayer.h" #include "PoolLayer.h"
#include "PoolProjectionLayer.h" #include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "CudnnPoolLayer.h" #include "CudnnPoolLayer.h"
#endif #endif
namespace paddle { namespace paddle {
...@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) { ...@@ -53,7 +53,7 @@ Layer* PoolLayer::create(const LayerConfig& config) {
const std::string& pool = config.inputs(0).pool_conf().pool_type(); const std::string& pool = config.inputs(0).pool_conf().pool_type();
if (pool == "max-projection" || pool == "avg-projection") { if (pool == "max-projection" || pool == "avg-projection") {
return new PoolProjectionLayer(config); return new PoolProjectionLayer(config);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
} else if (CudnnPoolLayer::typeCheck(pool)) { } else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config); return new CudnnPoolLayer(config);
#endif #endif
......
# gserver pacakge unittests # gserver pacakge unittests
if(NOT MOBILE_INFERENCE)
################### test_ProtoDataProvider ############ ################### test_ProtoDataProvider ############
add_unittest_without_exec(test_ProtoDataProvider add_unittest_without_exec(test_ProtoDataProvider
test_ProtoDataProvider.cpp) test_ProtoDataProvider.cpp)
# test_ProtoDataProvider will mkdir as same name, # test_ProtoDataProvider will mkdir as same name,
# so if WORKING_DIRECTORY is default directory, then # so if WORKING_DIRECTORY is default directory, then
# mkdir will get error. # mkdir will get error.
add_test(NAME test_ProtoDataProvider add_test(NAME test_ProtoDataProvider
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif()
################# test_LayerGrad ####################### ################# test_LayerGrad #######################
add_unittest_without_exec(test_LayerGrad add_unittest_without_exec(test_LayerGrad
...@@ -98,9 +100,11 @@ add_unittest_without_exec(test_KmaxSeqScore ...@@ -98,9 +100,11 @@ add_unittest_without_exec(test_KmaxSeqScore
add_test(NAME test_KmaxSeqScore add_test(NAME test_KmaxSeqScore
COMMAND test_KmaxSeqScore) COMMAND test_KmaxSeqScore)
if(NOT MOBILE_INFERENCE)
################## test_Evaluator ####################### ################## test_Evaluator #######################
add_unittest(test_Evaluator add_unittest(test_Evaluator
test_Evaluator.cpp) test_Evaluator.cpp)
endif()
################ test_LinearChainCRF #################### ################ test_LinearChainCRF ####################
add_simple_unittest(test_LinearChainCRF) add_simple_unittest(test_LinearChainCRF)
...@@ -131,27 +135,31 @@ if(NOT WITH_DOUBLE) ...@@ -131,27 +135,31 @@ if(NOT WITH_DOUBLE)
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif() endif()
if(NOT MOBILE_INFERENCE)
############### test_RecurrentGradientMachine ############### ############### test_RecurrentGradientMachine ###############
# TODO(yuyang18): There is some bug in test_RecurrentGradientMachine # TODO(yuyang18): There is some bug in test_RecurrentGradientMachine
# I will fix it. # I will fix it.
add_unittest_without_exec(test_RecurrentGradientMachine add_unittest_without_exec(test_RecurrentGradientMachine
test_RecurrentGradientMachine.cpp) test_RecurrentGradientMachine.cpp)
add_test(NAME test_RecurrentGradientMachine add_test(NAME test_RecurrentGradientMachine
COMMAND .set_python_path.sh -d COMMAND .set_python_path.sh -d
${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif()
add_unittest_without_exec(test_NetworkCompare
test_NetworkCompare.cpp) if(NOT MOBILE_INFERENCE)
if(WITH_GPU) add_unittest_without_exec(test_NetworkCompare
add_test(NAME test_NetworkCompare test_NetworkCompare.cpp)
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true if(WITH_GPU)
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) add_test(NAME test_NetworkCompare
else() COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true
add_test(NAME test_NetworkCompare WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false else()
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) add_test(NAME test_NetworkCompare
COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle)
endif()
endif() endif()
......
...@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf, ...@@ -674,7 +674,7 @@ void testLayerGradKernel(TestConfig testConf,
bool useGpu, bool useGpu,
bool useWeight, bool useWeight,
float epsilon) { float epsilon) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) return; if (useGpu) return;
#endif #endif
FLAGS_use_gpu = useGpu; FLAGS_use_gpu = useGpu;
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#pragma once #pragma once
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
using namespace std; // NOLINT using namespace std; // NOLINT
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
......
...@@ -17,7 +17,6 @@ limitations under the License. */ ...@@ -17,7 +17,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
...@@ -119,7 +118,7 @@ TEST(Layer, batchNorm) { ...@@ -119,7 +118,7 @@ TEST(Layer, batchNorm) {
CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576); CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void batchNormInference(int n, int c, int h, int w) { void batchNormInference(int n, int c, int h, int w) {
MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w); MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w); MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w);
......
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/LinearChainCRF.h" #include "paddle/gserver/layers/LinearChainCRF.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
...@@ -117,7 +116,7 @@ MatrixPtr doOneConvTest(size_t imgSize, ...@@ -117,7 +116,7 @@ MatrixPtr doOneConvTest(size_t imgSize,
} }
TEST(Layer, convParaUnified) { TEST(Layer, convParaUnified) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
MatrixPtr input, resultCpu, resultGpu; MatrixPtr input, resultCpu, resultGpu;
/// TEST1 for conv /// /// TEST1 for conv ///
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
......
...@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) { ...@@ -150,7 +150,7 @@ TEST(Layer, detectionOutputLayerFwd) {
useGpu, useGpu,
result2); result2);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
// GPU case 1. // GPU case 1.
useGpu = true; useGpu = true;
inputLoc = Matrix::create(1, 16, false, useGpu); inputLoc = Matrix::create(1, 16, false, useGpu);
......
...@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf, ...@@ -51,7 +51,7 @@ void testEvaluator(TestConfig testConf,
string testEvaluatorName, string testEvaluatorName,
size_t batchSize, size_t batchSize,
bool useGpu) { bool useGpu) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) return; if (useGpu) return;
#endif #endif
FLAGS_use_gpu = useGpu; FLAGS_use_gpu = useGpu;
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <vector> #include <vector>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
...@@ -97,7 +96,7 @@ TEST(Layer, kmaxSeqScoreLayer) { ...@@ -97,7 +96,7 @@ TEST(Layer, kmaxSeqScoreLayer) {
Matrix::create(subSeqStartPosition.back(), 1, false, false); Matrix::create(subSeqStartPosition.back(), 1, false, false);
std::vector<bool> mode = {false}; std::vector<bool> mode = {false};
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
mode.push_back(true); mode.push_back(true);
#endif #endif
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <cudnn.h> #include <cudnn.h>
#endif #endif
#include <gtest/gtest.h> #include <gtest/gtest.h>
...@@ -21,7 +21,6 @@ limitations under the License. */ ...@@ -21,7 +21,6 @@ limitations under the License. */
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
...@@ -258,7 +257,7 @@ void testProjectionConv(size_t groups, bool isDeconv) { ...@@ -258,7 +257,7 @@ void testProjectionConv(size_t groups, bool isDeconv) {
true); true);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(Projection, conv) { TEST(Projection, conv) {
/// test ConvProjection /// test ConvProjection
testProjectionConv(1, false); testProjectionConv(1, false);
...@@ -422,7 +421,7 @@ TEST(Layer, depthwiseConvLayer) { ...@@ -422,7 +421,7 @@ TEST(Layer, depthwiseConvLayer) {
// 'depthwise_conv' is a sepecial case of 'exconv' whose // 'depthwise_conv' is a sepecial case of 'exconv' whose
// groups size equals to the input channels size. // groups size equals to the input channels size.
testDepthwiseConvLayer("exconv", /* useGpu= */ false); testDepthwiseConvLayer("exconv", /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testDepthwiseConvLayer("exconv", /* useGpu= */ true); testDepthwiseConvLayer("exconv", /* useGpu= */ true);
#endif #endif
} }
...@@ -480,7 +479,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -480,7 +479,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, convLayer) { TEST(Layer, convLayer) {
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true);
testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true);
#endif #endif
...@@ -525,7 +524,7 @@ TEST(Layer, convTransLayer) { ...@@ -525,7 +524,7 @@ TEST(Layer, convTransLayer) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
...@@ -638,7 +637,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) { ...@@ -638,7 +637,7 @@ TEST(Layer, SelectiveFullyConnectedLayer) {
/* trans= */ false, /* trans= */ false,
/* useGup= */ false, /* useGup= */ false,
false); false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testLayerGrad(config, testLayerGrad(config,
"selective_fc", "selective_fc",
100, 100,
...@@ -1210,7 +1209,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) { ...@@ -1210,7 +1209,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
testLayerGrad(config, "pool", 100, trans, useGpu); testLayerGrad(config, "pool", 100, trans, useGpu);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TestConfig config; TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0});
...@@ -1236,7 +1235,7 @@ TEST(Layer, PoolLayer) { ...@@ -1236,7 +1235,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
...@@ -1309,7 +1308,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) { ...@@ -1309,7 +1308,7 @@ void testPool3DLayer(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, Pool3DLayer) { TEST(Layer, Pool3DLayer) {
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false); testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ false);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false); testPool3DLayer("max", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true); testPool3DLayer("avg", /* trans= */ false, /* useGpu= */ true);
testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true); testPool3DLayer("max", /* trans= */ false, /* useGpu= */ true);
#endif #endif
...@@ -1695,7 +1694,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) { ...@@ -1695,7 +1694,7 @@ void testBatchNormLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, BatchNormalizationLayer) { TEST(Layer, BatchNormalizationLayer) {
testBatchNormLayer("batch_norm", false, false); testBatchNormLayer("batch_norm", false, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testBatchNormLayer("batch_norm", false, true); testBatchNormLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) { if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNormLayer("cudnn_batch_norm", false, true); testBatchNormLayer("cudnn_batch_norm", false, true);
...@@ -1744,7 +1743,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) { ...@@ -1744,7 +1743,7 @@ void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, testBatchNorm3DLayer) { TEST(Layer, testBatchNorm3DLayer) {
testBatchNorm3DLayer("batch_norm", false, false); testBatchNorm3DLayer("batch_norm", false, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testBatchNorm3DLayer("batch_norm", false, true); testBatchNorm3DLayer("batch_norm", false, true);
if (hl_get_cudnn_lib_version() >= int(4000)) { if (hl_get_cudnn_lib_version() >= int(4000)) {
testBatchNorm3DLayer("cudnn_batch_norm", false, true); testBatchNorm3DLayer("cudnn_batch_norm", false, true);
...@@ -2262,7 +2261,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -2262,7 +2261,7 @@ void test3DConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DConvLayer) { TEST(Layer, test3DConvLayer) {
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false); test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true); test3DConvLayer("conv3d", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
...@@ -2339,7 +2338,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -2339,7 +2338,7 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
TEST(Layer, test3DDeConvLayer) { TEST(Layer, test3DDeConvLayer) {
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false); test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true); test3DDeConvLayer("deconv3d", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
......
...@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) { ...@@ -243,7 +243,7 @@ TEST(Compare, concat_slice) {
compareNetwork(config_file_a, config_file_b); compareNetwork(config_file_a, config_file_b);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(Compare, img_pool) { TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf"; std::string config_file_a = "./gserver/tests/img_pool_a.conf";
std::string config_file_b = "./gserver/tests/img_pool_b.conf"; std::string config_file_b = "./gserver/tests/img_pool_b.conf";
......
...@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) { ...@@ -151,7 +151,7 @@ TEST(Layer, priorBoxLayerFwd) {
useGpu, useGpu,
result); result);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
// reset the input parameters // reset the input parameters
variance[1] = 0.1; variance[1] = 0.1;
variance[3] = 0.2; variance[3] = 0.2;
......
...@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) { ...@@ -485,7 +485,7 @@ TEST(ProtoDataProvider, test) {
// Currently in async mode, useGpu is not supported // Currently in async mode, useGpu is not supported
continue; continue;
} }
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
continue; continue;
} }
...@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) { ...@@ -525,7 +525,7 @@ TEST(ProtoDataProvider, constant_slots) {
for (int numConstantSlots : {1, 2}) { for (int numConstantSlots : {1, 2}) {
for (int useGpu : numTwoArray) { for (int useGpu : numTwoArray) {
for (int dataCompression : numTwoArray) { for (int dataCompression : numTwoArray) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
continue; continue;
} }
...@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) { ...@@ -708,7 +708,7 @@ TEST(ProtoSequenceDataProvider, test) {
// Currently in async mode, useGpu is not supported // Currently in async mode, useGpu is not supported
continue; continue;
} }
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
continue; continue;
} }
......
...@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) { ...@@ -37,7 +37,7 @@ TEST(PyDataProvider, py_fill_slots) {
config.clear_files(); config.clear_files();
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile); config.set_files(dataFile);
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
bool useGpu = false; bool useGpu = false;
#else #else
bool useGpu = true; bool useGpu = true;
...@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) { ...@@ -71,7 +71,7 @@ TEST(PyDataProvider, py_fill_nest_slots) {
std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList";
config.set_files(dataFile); config.set_files(dataFile);
EXPECT_EQ(config.IsInitialized(), true); EXPECT_EQ(config.IsInitialized(), true);
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
bool useGpu = false; bool useGpu = false;
#else #else
bool useGpu = true; bool useGpu = true;
......
...@@ -24,7 +24,6 @@ limitations under the License. */ ...@@ -24,7 +24,6 @@ limitations under the License. */
#include "paddle/gserver/layers/Layer.h" #include "paddle/gserver/layers/Layer.h"
#include "paddle/gserver/layers/SelectiveFullyConnectedLayer.h" #include "paddle/gserver/layers/SelectiveFullyConnectedLayer.h"
#include "paddle/math/CpuSparseMatrix.h" #include "paddle/math/CpuSparseMatrix.h"
#include "paddle/trainer/Trainer.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
...@@ -321,7 +320,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) { ...@@ -321,7 +320,7 @@ TEST(Layer, SelectiveFcLayer_train_dense_mul) {
"filelist=gserver/tests/SelectiveFcTest/dense_mul_list"; "filelist=gserver/tests/SelectiveFcTest/dense_mul_list";
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
break; break;
} }
...@@ -388,7 +387,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, ...@@ -388,7 +387,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
outMatSelfc->getWidth(), outMatSelfc->getWidth(),
outMatSelfc->getElementCnt())); outMatSelfc->getElementCnt()));
cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT); cpuOutMatSelfc->copyFrom(*outMatSelfc, HPPL_STREAM_DEFAULT);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
...@@ -418,7 +417,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, ...@@ -418,7 +417,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config,
MatrixPtr cpuOutMatFc( MatrixPtr cpuOutMatFc(
new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth())); new CpuMatrix(outMatFc->getHeight(), outMatFc->getWidth()));
cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT); cpuOutMatFc->copyFrom(*outMatFc, HPPL_STREAM_DEFAULT);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (useGpu) { if (useGpu) {
hl_stream_synchronize(HPPL_STREAM_DEFAULT); hl_stream_synchronize(HPPL_STREAM_DEFAULT);
} }
...@@ -443,7 +442,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) { ...@@ -443,7 +442,7 @@ TEST(Layer, SelectiveFcLayer_train_sparse_mul) {
selLayerConfig.set_size(fcLayerWidth); selLayerConfig.set_size(fcLayerWidth);
testSelectiveFcLayerTrainSparseMul(selLayerConfig, false); testSelectiveFcLayerTrainSparseMul(selLayerConfig, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testSelectiveFcLayerTrainSparseMul(selLayerConfig, true); testSelectiveFcLayerTrainSparseMul(selLayerConfig, true);
#endif #endif
} }
......
...@@ -15,7 +15,6 @@ limitations under the License. */ ...@@ -15,7 +15,6 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h" #include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
...@@ -195,7 +194,7 @@ TEST(Layer, SeqSliceLayer) { ...@@ -195,7 +194,7 @@ TEST(Layer, SeqSliceLayer) {
vector<vector<real>> ends; vector<vector<real>> ends;
std::vector<bool> mode = {false}; std::vector<bool> mode = {false};
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
mode.push_back(true); mode.push_back(true);
#endif #endif
genSeqInfo(seqStartPos, subSeqStartPos); genSeqInfo(seqStartPos, subSeqStartPos);
......
...@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) { ...@@ -199,7 +199,7 @@ TEST(Layer, WarpCTCLayer) {
for (auto batchSize : {1, 10, 32}) { for (auto batchSize : {1, 10, 32}) {
for (auto normByTimes : {false, true}) { for (auto normByTimes : {false, true}) {
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) continue; if (useGpu) continue;
#endif #endif
LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize LOG(INFO) << "layerSize=" << layerSize << " batchSize=" << batchSize
......
...@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) { ...@@ -670,7 +670,7 @@ void GpuMatrix::leftMul(Matrix& a, real scaleAB, real scaleT) {
} }
void GpuMatrix::selectRows(Matrix& table, IVector& ids) { void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(dynamic_cast<GpuMatrix*>(&table)); CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu()); CHECK(table.useGpu());
CHECK(ids.useGpu()); CHECK(ids.useGpu());
...@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) { ...@@ -694,7 +694,7 @@ void GpuMatrix::selectRows(Matrix& table, IVector& ids) {
} }
void GpuMatrix::addToRows(Matrix& table, IVector& ids) { void GpuMatrix::addToRows(Matrix& table, IVector& ids) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(dynamic_cast<GpuMatrix*>(&table)); CHECK(dynamic_cast<GpuMatrix*>(&table));
CHECK(table.useGpu()); CHECK(table.useGpu());
CHECK(ids.useGpu()); CHECK(ids.useGpu());
...@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) { ...@@ -741,7 +741,7 @@ void GpuMatrix::rowMax(Matrix& max) {
} }
void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight(); size_t numSamples = getHeight();
size_t beam = maxVal.getWidth(); size_t beam = maxVal.getWidth();
......
...@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() { ...@@ -836,7 +836,7 @@ void GpuSparseMatrix::zeroMem() {
} }
void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { void GpuSparseMatrix::rowMax(IVector& maxIds, Matrix& maxVal) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal"; CHECK(maxIds.useGpu() && maxVal.useGpu()) << "Matrix type are not equal";
size_t numSamples = getHeight(); size_t numSamples = getHeight();
size_t beam = maxVal.getWidth(); size_t beam = maxVal.getWidth();
......
...@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) { ...@@ -172,7 +172,7 @@ void GpuVectorT<T>::isEqualTo(const VectorT<T>& b, const T& value) {
template <class T> template <class T>
void GpuVectorT<T>::selectFrom(const VectorT<T>& src, const VectorT<int>& ids) { void GpuVectorT<T>::selectFrom(const VectorT<T>& src, const VectorT<int>& ids) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
hl_vector_select_from<T>(this->getData(), hl_vector_select_from<T>(this->getData(),
this->getSize(), this->getSize(),
src.getData(), src.getData(),
...@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src, ...@@ -850,7 +850,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
size_t size) size_t size)
: sync_(nullptr) { : sync_(nullptr) {
CHECK_LE(offset + size, static_cast<size_t>(src.getSize())); CHECK_LE(offset + size, static_cast<size_t>(src.getSize()));
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
SyncedFlag* flag = src.getSync(); SyncedFlag* flag = src.getSync();
if (*flag == DATA_AT_CPU) { if (*flag == DATA_AT_CPU) {
src.copyToGpu(); // will set synchronous data between CPU and GPU src.copyToGpu(); // will set synchronous data between CPU and GPU
...@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src, ...@@ -861,7 +861,7 @@ CpuGpuVectorT<T>::CpuGpuVectorT(CpuGpuVectorT<T>& src,
auto cMemHandle = (src.getVector(false))->getMemoryHandle(); auto cMemHandle = (src.getVector(false))->getMemoryHandle();
cpuVectorT_ = std::make_shared<CpuVectorT<T>>( cpuVectorT_ = std::make_shared<CpuVectorT<T>>(
size, std::dynamic_pointer_cast<CpuMemoryHandle>(cMemHandle), offset); size, std::dynamic_pointer_cast<CpuMemoryHandle>(cMemHandle), offset);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
auto gMemHandle = (src.getVector(true))->getMemoryHandle(); auto gMemHandle = (src.getVector(true))->getMemoryHandle();
gpuVectorT_ = std::make_shared<GpuVectorT<T>>( gpuVectorT_ = std::make_shared<GpuVectorT<T>>(
size, std::dynamic_pointer_cast<GpuMemoryHandle>(gMemHandle), offset); size, std::dynamic_pointer_cast<GpuMemoryHandle>(gMemHandle), offset);
......
...@@ -68,7 +68,7 @@ void testPoolAllocator() { ...@@ -68,7 +68,7 @@ void testPoolAllocator() {
TEST(Allocator, Pool) { TEST(Allocator, Pool) {
testPoolAllocator<CpuAllocator>(); testPoolAllocator<CpuAllocator>();
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testPoolAllocator<GpuAllocator>(); testPoolAllocator<GpuAllocator>();
#endif #endif
} }
...@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) { ...@@ -92,7 +92,7 @@ TEST(MemoryHandle, Cpu) {
EXPECT_EQ(ptr1, ptr2); EXPECT_EQ(ptr1, ptr2);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(MemoryHandle, Gpu) { TEST(MemoryHandle, Gpu) {
int numGpu = hl_get_device_count(); int numGpu = hl_get_device_count();
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/** /**
* This test file use autotest::AutoCompare and cmpWithoutArg to compares the * This test file use autotest::AutoCompare and cmpWithoutArg to compares the
* implementation of CPU and GPU member function in * implementation of CPU and GPU member function in
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
......
...@@ -94,7 +94,7 @@ void testWrapper(F&& f) { ...@@ -94,7 +94,7 @@ void testWrapper(F&& f) {
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(ExecViaCpu, test1) { TEST(ExecViaCpu, test1) {
testWrapper(f); testWrapper(f);
testWrapper(&f); testWrapper(&f);
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/** /**
* This test file use autotest::AutoCompare and cmpWithArg to compares the * This test file use autotest::AutoCompare and cmpWithArg to compares the
* implementation of CPU and GPU member function in Matrix.cpp. * implementation of CPU and GPU member function in Matrix.cpp.
......
...@@ -47,7 +47,7 @@ struct MatrixPara { ...@@ -47,7 +47,7 @@ struct MatrixPara {
SparseFormat format; SparseFormat format;
}; };
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void test_sparse_matrix_mul(MatrixPara paraA, void test_sparse_matrix_mul(MatrixPara paraA,
MatrixPara paraB, MatrixPara paraB,
MatrixPara paraC) { MatrixPara paraC) {
...@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) { ...@@ -452,7 +452,7 @@ TEST(Matrix, SparseMatrixCSRFormatTrimFrom) {
matB->trimFrom(*mat); matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB); checkSMatrixEqual2(matA, matB);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>( GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true); height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSR, true);
matC->trimFrom(*mat); matC->trimFrom(*mat);
...@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) { ...@@ -546,7 +546,7 @@ TEST(Matrix, SparseMatrixCSCFormatTrimFrom) {
matB->trimFrom(*mat); matB->trimFrom(*mat);
checkSMatrixEqual2(matA, matB); checkSMatrixEqual2(matA, matB);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>( GpuSparseMatrixPtr matC = std::make_shared<GpuSparseMatrix>(
height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true); height, trimedWidth, height, FLOAT_VALUE, SPARSE_CSC, true);
matC->trimFrom(*mat); matC->trimFrom(*mat);
......
...@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) { ...@@ -91,7 +91,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) {
typedef std::function<void(size_t size, bool useGpu)> testMatrixFunc; typedef std::function<void(size_t size, bool useGpu)> testMatrixFunc;
void testCase(testMatrixFunc matrixFunc) { void testCase(testMatrixFunc matrixFunc) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
#else #else
for (auto useGpu : {false}) { for (auto useGpu : {false}) {
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(MatrixBatchTransTest, test_batch_matrix_transpose) { TEST(MatrixBatchTransTest, test_batch_matrix_transpose) {
const int nx = 100; const int nx = 100;
const int ny = 50; const int ny = 50;
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when /// This unittest checks GpuMatrix/CpuMatrix get same result, so disable when
/// only cpu version. /// only cpu version.
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result, /// This unittest checks GpuSparseMatrix/CpuSparseMatrix get same result,
// so disable when // so disable when
/// only cpu version. /// only cpu version.
......
...@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { ...@@ -175,7 +175,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
} }
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (system_allocator_->UseGpu()) { if (system_allocator_->UseGpu()) {
if ((total_used_ + total_free_) == 0) { if ((total_used_ + total_free_) == 0) {
// Compute the maximum allocation size for the first allocation. // Compute the maximum allocation size for the first allocation.
......
...@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) { ...@@ -62,7 +62,7 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
bool CPUAllocator::UseGpu() const { return false; } bool CPUAllocator::UseGpu() const { return false; }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
void* GPUAllocator::Alloc(size_t& index, size_t size) { void* GPUAllocator::Alloc(size_t& index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr // CUDA documentation doesn't explain if cudaMalloc returns nullptr
......
...@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator { ...@@ -40,7 +40,7 @@ class CPUAllocator : public SystemAllocator {
virtual bool UseGpu() const; virtual bool UseGpu() const;
}; };
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
class GPUAllocator : public SystemAllocator { class GPUAllocator : public SystemAllocator {
public: public:
virtual void* Alloc(size_t& index, size_t size); virtual void* Alloc(size_t& index, size_t size);
......
...@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) { ...@@ -56,7 +56,7 @@ TEST(CPUAllocator, LockMem) {
TestAllocator(a, 0); TestAllocator(a, 0);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(GPUAllocator, Alloc) { TEST(GPUAllocator, Alloc) {
paddle::memory::detail::GPUAllocator a; paddle::memory::detail::GPUAllocator a;
TestAllocator(a, 2048); TestAllocator(a, 2048);
......
...@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst, ...@@ -26,7 +26,7 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
std::memcpy(dst, src, num); std::memcpy(dst, src, num);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place, void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
void* dst, void* dst,
......
...@@ -33,7 +33,7 @@ namespace memory { ...@@ -33,7 +33,7 @@ namespace memory {
template <typename DstPlace, typename SrcPlace> template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num); void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
/** /**
* \brief Copy memory from one place to another place. * \brief Copy memory from one place to another place.
......
...@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) { ...@@ -62,7 +62,7 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return GetCPUBuddyAllocator()->Used(); return GetCPUBuddyAllocator()->Used();
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
using BuddyAllocVec = std::vector<BuddyAllocator*>; using BuddyAllocVec = std::vector<BuddyAllocator*>;
......
...@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) { ...@@ -80,7 +80,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
size_t align(size_t size, paddle::platform::GPUPlace place) { size_t align(size_t size, paddle::platform::GPUPlace place) {
size += sizeof(paddle::memory::detail::Metadata); size += sizeof(paddle::memory::detail::Metadata);
......
...@@ -22,7 +22,7 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Inference"), PADDLE_ENFORCE(ctx->HasInput("Inference"),
"Input(Inference) of AccuracyOp should not be null."); "Input(Inference) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), PADDLE_ENFORCE(ctx->HasInput("Label"),
......
...@@ -22,7 +22,7 @@ class ActivationOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class ActivationOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
ctx->SetOutputDim("Y", ctx->GetInputDim("X")); ctx->SetOutputDim("Y", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Y"); ctx->ShareLoD("X", /*->*/ "Y");
} }
...@@ -33,7 +33,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel { ...@@ -33,7 +33,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Y")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Y"));
} }
}; };
...@@ -69,6 +69,22 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -69,6 +69,22 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
template <typename AttrType>
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LeakyReluOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LeakyRelu operator");
AddOutput("Y", "Output of LeakyRelu operator");
AddComment(
"LeakyRelu activation operator, "
"leaky_relu = max(x, alpha * x)");
AddAttr<AttrType>("alpha", "The small negative slope")
.SetDefault(static_cast<AttrType>(0.02f));
}
};
class TanhOpMaker : public framework::OpProtoAndCheckerMaker { class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
TanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) TanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
...@@ -81,6 +97,17 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -81,6 +97,17 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
TanhShrinkOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of TanhShrink operator");
AddOutput("Y", "Output of TanhShrink operator");
AddComment("TanhShrink activation operator, tanhshrink(x) = x - tanh(x)");
}
};
class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SqrtOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SqrtOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
...@@ -174,6 +201,19 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -174,6 +201,19 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
template <typename AttrType>
class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
public:
Relu6OpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu6 operator");
AddOutput("Y", "Output of Relu6 operator");
AddComment("Relu6 activation operator, relu6 = min(max(0, x), 6)");
AddAttr<AttrType>("threshold", "The threshold value of Relu6")
.SetDefault(static_cast<AttrType>(6));
}
};
template <typename AttrType> template <typename AttrType>
class PowOpMaker : public framework::OpProtoAndCheckerMaker { class PowOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
...@@ -219,6 +259,9 @@ REGISTER_OP(relu, ops::ActivationOp, ops::ReluOpMaker, relu_grad, ...@@ -219,6 +259,9 @@ REGISTER_OP(relu, ops::ActivationOp, ops::ReluOpMaker, relu_grad,
REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad, REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker,
tanh_shrink_grad, ops::ActivationOpGrad);
REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad, REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
...@@ -240,9 +283,15 @@ REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad, ...@@ -240,9 +283,15 @@ REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad,
REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker<float>, brelu_grad, REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker<float>, brelu_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker<float>,
leaky_relu_grad, ops::ActivationOpGrad);
REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker<float>, REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker<float>,
soft_relu_grad, ops::ActivationOpGrad); soft_relu_grad, ops::ActivationOpGrad);
REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker<float>, relu6_grad,
ops::ActivationOpGrad);
REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad, REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
...@@ -252,11 +301,9 @@ REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad, ...@@ -252,11 +301,9 @@ REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad,
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \ REGISTER_OP_CPU_KERNEL( \
act_type, \ act_type, \
paddle::operators::ActivationKernel<paddle::platform::CPUPlace, \ ops::ActivationKernel<paddle::platform::CPUPlace, ops::functor<float>>); \
paddle::operators::functor<float>>); \
REGISTER_OP_CPU_KERNEL(act_type##_grad, \ REGISTER_OP_CPU_KERNEL(act_type##_grad, \
paddle::operators::ActivationGradKernel< \ ops::ActivationGradKernel<paddle::platform::CPUPlace, \
paddle::platform::CPUPlace, \ ops::grad_functor<float>>);
paddle::operators::grad_functor<float>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL); FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);
...@@ -15,14 +15,14 @@ ...@@ -15,14 +15,14 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/activation_op.h" #include "paddle/operators/activation_op.h"
namespace ops = paddle::operators;
#define REGISTER_ACTIVATION_GPU_KERNEL(act_type, functor, grad_functor) \ #define REGISTER_ACTIVATION_GPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_GPU_KERNEL( \ REGISTER_OP_GPU_KERNEL( \
act_type, \ act_type, \
paddle::operators::ActivationKernel<paddle::platform::GPUPlace, \ ops::ActivationKernel<paddle::platform::GPUPlace, ops::functor<float>>); \
paddle::operators::functor<float>>); \
REGISTER_OP_GPU_KERNEL(act_type##_grad, \ REGISTER_OP_GPU_KERNEL(act_type##_grad, \
paddle::operators::ActivationGradKernel< \ ops::ActivationGradKernel<paddle::platform::GPUPlace, \
paddle::platform::GPUPlace, \ ops::grad_functor<float>>);
paddle::operators::grad_functor<float>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_GPU_KERNEL); FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_GPU_KERNEL);
...@@ -146,6 +146,24 @@ struct TanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -146,6 +146,24 @@ struct TanhGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template <typename T>
struct TanhShrinkFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
y.device(d) = x - x.tanh();
}
};
template <typename T>
struct TanhShrinkGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
dx.device(d) = dy * (x.tanh() * x.tanh());
}
};
// sqrt(x) = x^(1/2) // sqrt(x) = x^(1/2)
template <typename T> template <typename T>
struct SqrtFunctor : public BaseActivationFunctor<T> { struct SqrtFunctor : public BaseActivationFunctor<T> {
...@@ -262,6 +280,36 @@ struct BReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -262,6 +280,36 @@ struct BReluGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
// relu6(x) = min(max(0, x), 6)
template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> {
float threshold;
// NOTE: Explicit hides the `BaseActivationFunctor<T>::GetAttrs`
// not polymorphism for speed.
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
y.device(d) = x.cwiseMax(static_cast<T>(0)).cwiseMin(threshold);
}
};
template <typename T>
struct Relu6GradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
dx.device(d) =
dy * ((x > static_cast<T>(0)) * (x < threshold)).template cast<T>();
}
};
// softsign(x) = x / (1 + |x|) // softsign(x) = x / (1 + |x|)
template <typename T> template <typename T>
struct SoftsignFunctor : public BaseActivationFunctor<T> { struct SoftsignFunctor : public BaseActivationFunctor<T> {
...@@ -309,6 +357,33 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -309,6 +357,33 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct LeakyReluFunctor : public BaseActivationFunctor<T> {
float alpha;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
y.device(d) = x.cwiseMax(alpha * x);
}
};
template <typename T>
struct LeakyReluGradFunctor : public BaseActivationFunctor<T> {
float alpha;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"alpha", &alpha}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
auto temp1 = alpha * (x < static_cast<T>(0)).template cast<T>().eval();
auto temp2 = (x >= static_cast<T>(0)).template cast<T>().eval();
dx.device(d) = dy * (temp1 + temp2).template cast<T>();
}
};
template <typename T> template <typename T>
struct PowFunctor : public BaseActivationFunctor<T> { struct PowFunctor : public BaseActivationFunctor<T> {
float factor; float factor;
...@@ -379,4 +454,7 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -379,4 +454,7 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
__macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(pow, PowFunctor, PowGradFunctor); \ __macro(pow, PowFunctor, PowGradFunctor); \
__macro(stanh, STanhFunctor, STanhGradFunctor); \ __macro(stanh, STanhFunctor, STanhGradFunctor); \
__macro(softsign, SoftsignFunctor, SoftsignGradFunctor) __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/adadelta_op.h"
namespace paddle {
namespace operators {
class AdadeltaOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("AvgSquaredGrad"),
"Input(AvgSquaredGrad) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("AvgSquaredUpdate"),
"Input(AvgSquaredUpdate) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("AvgSquaredGradOut"),
"Output(AvgSquaredGradOut) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("AvgSquaredUpdateOut"),
"Output(AvgSquaredUpdateOut) of AdadeltaOp should not be null.");
auto param_dim = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Grad"),
"param and grad input of AdadeltaOp should have same dimension");
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredGrad"),
"Param and AvgSquaredGrad input of AdadeltaOp "
"should have same dimension");
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredUpdate"),
"Param and AvgSquaredUpdate input of AdadeltaOp "
"should have same dimension");
ctx->SetOutputDim("ParamOut", param_dim);
ctx->SetOutputDim("AvgSquaredGradOut", param_dim);
ctx->SetOutputDim("AvgSquaredUpdateOut", param_dim);
}
};
class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AdadeltaOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("AvgSquaredGrad",
"(Tensor) Input expectation of squared gradient");
AddInput("AvgSquaredUpdate",
"(Tensor) Input expectation of squared parameter updates");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("AvgSquaredGradOut",
"(Tensor) Output expectation of squared gradient");
AddOutput("AvgSquaredUpdateOut",
"(Tensor) Output expectation of squared parameter updates");
AddAttr<float>("rho",
"(float, default 0.95) Exponential decay rate "
"for squared gradients.")
.SetDefault(0.95f);
AddAttr<float>("epsilon",
"(float, default 1.0e-6) Constant for "
"numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC(
Adadelta Updates Operator.
This implements the Adadelta optimizer[1]. Adadelta is a per-dimension
adaptive learning rate method for gradient descent.
Adadelta updates:
avg_squared_grad_out = rho * avg_squared_grad + (1 - rho) * grad * grad
param_update = - sqrt((avg_squared_update + epsilon) /
(avg_squared_grad_out + epsilon)) * grad
avg_squared_update_out = rho * avg_squared_update + (1 - rho) * param_update**2
param_out = param + param_update
References:
[1] ADADELTA: An Adaptive Learning Rate Method
https://arxiv.org/abs/1212.5701
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adadelta, ops::AdadeltaOp, ops::AdadeltaOpMaker);
REGISTER_OP_CPU_KERNEL(
adadelta, ops::AdadeltaOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/adadelta_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
adadelta, ops::AdadeltaOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class AdadeltaOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto avg_squared_grad_out_tensor =
ctx.Output<framework::Tensor>("AvgSquaredGradOut");
auto avg_squared_update_out_tensor =
ctx.Output<framework::Tensor>("AvgSquaredUpdateOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
avg_squared_grad_out_tensor->mutable_data<T>(ctx.GetPlace());
avg_squared_update_out_tensor->mutable_data<T>(ctx.GetPlace());
float rho = ctx.Attr<float>("rho");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
// Squared gradient accumulator
auto avg_squared_grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("AvgSquaredGrad"));
// Squared updates accumulator
auto avg_squared_update = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("AvgSquaredUpdate"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto avg_squared_grad_out =
framework::EigenVector<T>::Flatten(*avg_squared_grad_out_tensor);
auto avg_squared_update_out =
framework::EigenVector<T>::Flatten(*avg_squared_update_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
avg_squared_grad_out.device(place) =
rho * avg_squared_grad + (1 - rho) * grad.square();
auto update =
-((avg_squared_update + epsilon) / (avg_squared_grad_out + epsilon))
.sqrt() *
grad;
avg_squared_update_out.device(place) =
rho * avg_squared_update + (1 - rho) * update.square();
param_out.device(place) = param + update;
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/adagrad_op.h"
namespace paddle {
namespace operators {
class AdagradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(Moment) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(MomentOut) of AdagradOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"LearningRate should have one element");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of AdagradOp should have the same dimension.");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment"),
"Param and Moment input of AdagradOp should have the same dimension.");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("MomentOut", param_dims);
}
};
class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AdagradOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("Moment", "(Tensor) Second moment");
AddInput("LearningRate", "(Tensor) Learning rate");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("MomentOut", "(Tensor) Output second moment");
AddAttr<float>("epsilon",
"(float, default 1.0e-6) "
"Constant for numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC(
Adaptive Gradient Algorithm (Adagrad).
moment_out = moment + grad * grad
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have the epsilon attribute. It is added here for numerical stability
by avoiding division by zero.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adagrad, ops::AdagradOp, ops::AdagradOpMaker);
REGISTER_OP_CPU_KERNEL(adagrad,
ops::AdagradOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/adagrad_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(adagrad,
ops::AdagradOpKernel<paddle::platform::GPUPlace, float>);
...@@ -13,16 +13,43 @@ See the License for the specific language governing permissions and ...@@ -13,16 +13,43 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace framework { namespace operators {
OperatorBase* BuildGradOp(const OperatorBase* op); template <typename Place, typename T>
class AdagradOpKernel : public framework::OpKernel<T> {
void CompleteGradOpDesc(const OpDescBind* forw_op, OpDescBind* grad_op); public:
void Compute(const framework::ExecutionContext& ctx) const override {
} // namespace framework auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment_out.device(place) = moment + grad * grad;
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
param_out.device(place) =
param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
}
};
} // namespace operators
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/adamax_op.h"
namespace paddle {
namespace operators {
class AdamaxOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(Moment) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("InfNorm"),
"Input(InfNorm) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"),
"Input(Beta1Pow) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(MomentOut) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("InfNormOut"),
"Output(InfNormOut) of AdamaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta1PowOut"),
"Output(Beta1PowOut) of AdamaxOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 dimension");
auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of AdamaxOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment"),
"Param and Moment input of AdamaxOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("InfNorm"),
"Param and InfNorm input of AdamaxOp should have same dimension");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("MomentOut", param_dims);
ctx->SetOutputDim("InfNormOut", param_dims);
ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims);
}
};
class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AdamaxOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate");
AddInput("Moment", "(Tensor) First moment");
AddInput("InfNorm",
"(Tensor) "
"Input exponentially weighted infinity norm");
AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("MomentOut", "(Tensor) Output first moment");
AddOutput("InfNormOut",
"(Tensor) "
"Output exponentially weighted infinity norm");
AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator");
AddAttr<float>("beta1",
"(float, default 0.9) "
"Exponential decay rate for the "
"1st moment estimates.")
.SetDefault(0.9f);
AddAttr<float>("beta2",
"(float, default 0.999) "
"exponential decay rate for the weighted "
"infinity norm estimates.")
.SetDefault(0.999f);
AddAttr<float>("epsilon",
"(float, default 1.0e-8) "
"Constant for numerical stability")
.SetDefault(1.0e-8f);
AddComment(R"DOC(
Adamax Updates Operator.
This implements the Adamax optimizer from Section 7 of the Adam
paper[1]. Adamax is a variant of the
Adam algorithm based on the infinity norm.
Adamax updates:
moment_out = beta1 * moment + (1 - beta1) * grad
inf_norm_out = max(beta2 * inf_norm + epsilon, abs(grad))
beta1_pow_out = beta1_pow * beta1
learning_rate_t = learning_rate/(1 - beta1_pow_out)
param_out = param - learning_rate_t * moment_out/inf_norm_out
The original paper does not have an epsilon attribute.
However, it is added here for numerical stability
by preventing divide by 0.
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adamax, ops::AdamaxOp, ops::AdamaxOpMaker);
REGISTER_OP_CPU_KERNEL(adamax,
ops::AdamaxOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/adamax_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(adamax,
ops::AdamaxOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class AdamaxOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
auto inf_norm_out_tensor = ctx.Output<framework::Tensor>("InfNormOut");
auto beta1_pow_out_tensor = ctx.Output<framework::Tensor>("Beta1PowOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
inf_norm_out_tensor->mutable_data<T>(ctx.GetPlace());
beta1_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
float beta1 = ctx.Attr<float>("beta1");
float beta2 = ctx.Attr<float>("beta2");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment"));
auto inf_norm = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("InfNorm"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto beta1_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta1Pow"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto inf_norm_out =
framework::EigenVector<T>::Flatten(*inf_norm_out_tensor);
auto beta1_pow_out =
framework::EigenVector<T>::Flatten(*beta1_pow_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment_out.device(place) = beta1 * moment + (1 - beta1) * grad;
inf_norm_out.device(place) =
grad.abs().cwiseMax((beta2 * inf_norm) + epsilon);
beta1_pow_out.device(place) = beta1_pow * beta1;
auto lr_t = lr / (1 - beta1_pow_out);
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
param_out.device(place) =
param - lr_t.broadcast(m_dsize) * (moment_out / inf_norm_out);
}
};
} // namespace operators
} // namespace paddle
...@@ -22,7 +22,7 @@ class ClipOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class ClipOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ClipOp should not be null."); "Input(X) of ClipOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -61,7 +61,7 @@ class ClipOpGrad : public framework::OperatorWithKernel { ...@@ -61,7 +61,7 @@ class ClipOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null"); "Input(Out@GRAD) should not be null");
......
...@@ -24,7 +24,7 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class ConcatOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL, PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL,
"Inputs(X) of ConcatOp should be empty.") "Inputs(X) of ConcatOp should be empty.")
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -83,7 +83,7 @@ class ConcatOpGrad : public framework::OperatorWithKernel { ...@@ -83,7 +83,7 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
} }
}; };
......
...@@ -27,7 +27,7 @@ class Conv2DOp : public framework::OperatorWithKernel { ...@@ -27,7 +27,7 @@ class Conv2DOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"), PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DOp should not be null."); "Input(Input) of Conv2DOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"), PADDLE_ENFORCE(ctx->HasInput("Filter"),
...@@ -106,7 +106,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel { ...@@ -106,7 +106,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) { if (ctx->HasOutput(framework::GradVarName("Input"))) {
......
...@@ -24,7 +24,7 @@ class CosSimOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class CosSimOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
// notnull check // notnull check
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of CosSimOp should not be null."); "Input(X) of CosSimOp should not be null.");
...@@ -98,7 +98,7 @@ class CosSimOpGrad : public framework::OperatorWithKernel { ...@@ -98,7 +98,7 @@ class CosSimOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
// notnull check // notnull check
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must not be null."); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must not be null.");
......
...@@ -25,7 +25,7 @@ class CropOp : public framework::OperatorWithKernel { ...@@ -25,7 +25,7 @@ class CropOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of CropOp should not be null."); "Input(X) of CropOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -115,7 +115,7 @@ class CropOpGrad : public framework::OperatorWithKernel { ...@@ -115,7 +115,7 @@ class CropOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null"); "Input(Out@GRAD) should not be null");
......
...@@ -22,7 +22,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
...@@ -60,7 +60,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { ...@@ -60,7 +60,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
......
...@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> { ...@@ -34,7 +34,7 @@ struct StridedMemcpyFunctor<T, 1> {
auto& cpu_place = boost::get<platform::CPUPlace>(place); auto& cpu_place = boost::get<platform::CPUPlace>(place);
memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head); memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head);
} else { } else {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
auto& gpu_place = boost::get<platform::GPUPlace>(place); auto& gpu_place = boost::get<platform::GPUPlace>(place);
auto& cuda_ctx = auto& cuda_ctx =
reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx); reinterpret_cast<const platform::CUDADeviceContext&>(dev_ctx);
......
...@@ -24,7 +24,7 @@ class DropoutOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class DropoutOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE_GE(ctx->Attrs().Get<float>("dropout_prob"), 0); PADDLE_ENFORCE_GE(ctx->Attrs().Get<float>("dropout_prob"), 0);
PADDLE_ENFORCE_LE(ctx->Attrs().Get<float>("dropout_prob"), 1); PADDLE_ENFORCE_LE(ctx->Attrs().Get<float>("dropout_prob"), 1);
...@@ -70,7 +70,7 @@ class DropoutOpGrad : public framework::OperatorWithKernel { ...@@ -70,7 +70,7 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(ctx->Attrs().Get<bool>("is_training"), 1, PADDLE_ENFORCE_EQ(ctx->Attrs().Get<bool>("is_training"), 1,
"GradOp is only callable when is_training is true"); "GradOp is only callable when is_training is true");
......
...@@ -25,7 +25,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { ...@@ -25,7 +25,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
protected: protected:
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of elementwise op should not be null"); "Input(X) of elementwise op should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"), PADDLE_ENFORCE(ctx->HasInput("Y"),
...@@ -106,7 +106,7 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { ...@@ -106,7 +106,7 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
......
...@@ -22,7 +22,7 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class FillZerosLikeOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of FillZerosLikeOp should not be null."); "Input(X) of FillZerosLikeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"), PADDLE_ENFORCE(ctx->HasOutput("Y"),
......
...@@ -23,7 +23,7 @@ class GatherOp : public framework::OperatorWithKernel { ...@@ -23,7 +23,7 @@ class GatherOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of GatherOp should not be null."); "Input(X) of GatherOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Index"), PADDLE_ENFORCE(ctx->HasInput("Index"),
...@@ -51,7 +51,7 @@ class GatherGradOp : public framework::OperatorWithKernel { ...@@ -51,7 +51,7 @@ class GatherGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
} }
......
...@@ -43,7 +43,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { ...@@ -43,7 +43,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of GaussianRandomOp should not be null."); "Output(Out) of GaussianRandomOp should not be null.");
auto dims = ctx->Attrs().Get<std::vector<int>>("dims"); auto dims = ctx->Attrs().Get<std::vector<int>>("dims");
......
...@@ -22,7 +22,7 @@ class LookupTableOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class LookupTableOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("W"), PADDLE_ENFORCE(ctx->HasInput("W"),
"Input(W) of LookupTableOp should not be null."); "Input(W) of LookupTableOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Ids"), PADDLE_ENFORCE(ctx->HasInput("Ids"),
...@@ -70,7 +70,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { ...@@ -70,7 +70,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
auto table_dims = ctx->GetInputDim("W"); auto table_dims = ctx->GetInputDim("W");
ctx->SetOutputDim(framework::GradVarName("W"), table_dims); ctx->SetOutputDim(framework::GradVarName("W"), table_dims);
} }
......
...@@ -22,7 +22,7 @@ class LstmUnitOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class LstmUnitOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of LSTM should not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of LSTM should not be null.");
PADDLE_ENFORCE(ctx->HasInput("C_prev"), PADDLE_ENFORCE(ctx->HasInput("C_prev"),
"Input(C_prev) of LSTM should not be null."); "Input(C_prev) of LSTM should not be null.");
...@@ -77,7 +77,7 @@ class LstmUnitGradOp : public framework::OperatorWithKernel { ...@@ -77,7 +77,7 @@ class LstmUnitGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("C")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("C")),
"Input(C@GRAD) should not be null"); "Input(C@GRAD) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("H")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("H")),
......
...@@ -71,7 +71,7 @@ void testIm2col() { ...@@ -71,7 +71,7 @@ void testIm2col() {
context = context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace()); new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else { } else {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
context = context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace()); new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
#else #else
...@@ -116,7 +116,7 @@ void testIm2col() { ...@@ -116,7 +116,7 @@ void testIm2col() {
TEST(math, im2col) { TEST(math, im2col) {
testIm2col<paddle::platform::CPUPlace>(); testIm2col<paddle::platform::CPUPlace>();
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
testIm2col<paddle::platform::GPUPlace>(); testIm2col<paddle::platform::GPUPlace>();
#endif #endif
} }
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(math_function, notrans_mul_trans) { TEST(math_function, notrans_mul_trans) {
paddle::framework::Tensor input1; paddle::framework::Tensor input1;
paddle::framework::Tensor input1_gpu; paddle::framework::Tensor input1_gpu;
......
...@@ -22,7 +22,7 @@ class MeanOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class MeanOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of MeanOp should not be null."); "Input(X) of MeanOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -36,7 +36,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -36,7 +36,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
MeanOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) MeanOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op"); AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op").NotInGradient(); AddOutput("Out", "The output of mean op");
AddComment(R"DOC( Mean Operator AddComment(R"DOC( Mean Operator
)DOC"); )DOC");
} }
...@@ -47,16 +47,32 @@ class MeanGradOp : public framework::OperatorWithKernel { ...@@ -47,16 +47,32 @@ class MeanGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
} }
}; };
class MeanGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto* grad_op = new framework::OpDescBind();
grad_op->SetType("mean_grad");
grad_op->SetInput("X", Input("X"));
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
return std::unique_ptr<framework::OpDescBind>(grad_op);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker, mean_grad, ops::MeanGradOp); REGISTER_OPERATOR(mean, ops::MeanOp, ops::MeanOpMaker, ops::MeanGradMaker);
REGISTER_OPERATOR(mean_grad, ops::MeanGradOp);
REGISTER_OP_CPU_KERNEL(mean, REGISTER_OP_CPU_KERNEL(mean,
ops::MeanKernel<paddle::platform::CPUPlace, float>); ops::MeanKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(mean_grad, REGISTER_OP_CPU_KERNEL(mean_grad,
......
...@@ -26,7 +26,7 @@ class MinusOp : public framework::OperatorWithKernel { ...@@ -26,7 +26,7 @@ class MinusOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of MinusOp should not be null."); "Input(X) of MinusOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), PADDLE_ENFORCE(ctx->HasInput("Y"),
...@@ -49,9 +49,9 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,9 +49,9 @@ class MinusOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MinusOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) MinusOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The left tensor of minus operator.").NotInGradient(); AddInput("X", "The left tensor of minus operator.");
AddInput("Y", "The right tensor of minus operator.").NotInGradient(); AddInput("Y", "The right tensor of minus operator.");
AddOutput("Out", "The output tensor of minus operator.").NotInGradient(); AddOutput("Out", "The output tensor of minus operator.");
AddComment(R"DOC(Minus Operator AddComment(R"DOC(Minus Operator
...@@ -64,26 +64,35 @@ or not. But the output only shares the LoD with input `X`. ...@@ -64,26 +64,35 @@ or not. But the output only shares the LoD with input `X`.
)DOC"); )DOC");
} }
}; };
template <typename AttrType>
class MinusGradOp : public NetOp { class MinusGradMaker : public framework::GradOpDescMakerBase {
public: public:
MinusGradOp(const std::string &type, const framework::VariableNameMap &inputs, using framework::GradOpDescMakerBase::GradOpDescMakerBase;
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs) std::vector<std::unique_ptr<framework::OpDescBind>> operator()()
: NetOp(type, inputs, outputs, attrs) { const override {
auto out_grad = Input(framework::GradVarName("Out")); std::vector<std::unique_ptr<framework::OpDescBind>> ops;
auto x_grad = Output(framework::GradVarName("X")); auto x_g = InputGrad("X");
auto y_grad = Output(framework::GradVarName("Y")); if (!x_g.empty()) {
auto *x_g_op = new framework::OpDescBind();
// x_grad = out_grad x_g_op->SetType("scale");
AppendOp(framework::OpRegistry::CreateOp("identity", {{"X", {out_grad}}}, x_g_op->SetInput("X", OutputGrad("Out"));
{{"Y", {x_grad}}}, {})); x_g_op->SetOutput("Out", x_g);
x_g_op->SetAttr("scale", 1.0f);
framework::AttributeMap scale_attr; ops.emplace_back(x_g_op);
scale_attr["scale"] = static_cast<AttrType>(-1); }
AppendOp(framework::OpRegistry::CreateOp("scale", {{"X", {out_grad}}},
{{"Out", {y_grad}}}, scale_attr)); auto y_g = InputGrad("Y");
CompleteAddOp(false); if (!y_g.empty()) {
auto *y_g_op = new framework::OpDescBind();
y_g_op->SetType("scale");
y_g_op->SetInput("X", OutputGrad("Out"));
y_g_op->SetOutput("Out", y_g);
y_g_op->SetAttr("scale", -1.0f);
ops.emplace_back(y_g_op);
}
return ops;
} }
}; };
...@@ -91,7 +100,6 @@ class MinusGradOp : public NetOp { ...@@ -91,7 +100,6 @@ class MinusGradOp : public NetOp {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(minus, ops::MinusOp, ops::MinusOpMaker, minus_grad, REGISTER_OPERATOR(minus, ops::MinusOp, ops::MinusOpMaker, ops::MinusGradMaker);
ops::MinusGradOp<float>);
REGISTER_OP_CPU_KERNEL(minus, REGISTER_OP_CPU_KERNEL(minus,
ops::MinusKernel<paddle::platform::CPUPlace, float>); ops::MinusKernel<paddle::platform::CPUPlace, float>);
...@@ -22,7 +22,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized."); PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized."); PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized.");
...@@ -74,7 +74,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { ...@@ -74,7 +74,7 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized."); PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized."); PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("IntermediateVal"), PADDLE_ENFORCE(ctx->HasInput("IntermediateVal"),
......
...@@ -24,7 +24,7 @@ class MulOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class MulOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MulOp should not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MulOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) of MulOp should not be null."); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) of MulOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -97,7 +97,7 @@ class MulOpGrad : public framework::OperatorWithKernel { ...@@ -97,7 +97,7 @@ class MulOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
......
...@@ -24,7 +24,7 @@ class MultiplexOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class MultiplexOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("Ids"), "Input(Ids) shouldn't be null.");
PADDLE_ENFORCE(!ctx->Inputs("X").empty(), PADDLE_ENFORCE(!ctx->Inputs("X").empty(),
"MultiInput(X) shouldn't be empty."); "MultiInput(X) shouldn't be empty.");
...@@ -90,7 +90,7 @@ class MultiplexGradOp : public framework::OperatorWithKernel { ...@@ -90,7 +90,7 @@ class MultiplexGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(!ctx->Inputs("X").empty(), "Input(X) should not be null."); PADDLE_ENFORCE(!ctx->Inputs("X").empty(), "Input(X) should not be null.");
PADDLE_ENFORCE(!ctx->Outputs(framework::GradVarName("X")).empty(), PADDLE_ENFORCE(!ctx->Outputs(framework::GradVarName("X")).empty(),
"Output(X@Grad) should not be null."); "Output(X@Grad) should not be null.");
......
...@@ -24,7 +24,7 @@ class PadOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class PadOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of PadOp should not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of PadOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of PadOp should not be null."); "Output(Out) of PadOp should not be null.");
...@@ -56,8 +56,7 @@ class PadOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -56,8 +56,7 @@ class PadOpMaker : public framework::OpProtoAndCheckerMaker {
"The input should be a k-D tensor(k > 0 and k < 7)"); "The input should be a k-D tensor(k > 0 and k < 7)");
AddOutput("Out", AddOutput("Out",
"The output of pad op." "The output of pad op."
"A tensor with the same shape as X.") "A tensor with the same shape as X.");
.NotInGradient();
AddComment(R"DOC( AddComment(R"DOC(
Pad input into output, as specified by paddings and pad_value. The input should be a k-D tensor(k > 0 and k < 7). As an example: Pad input into output, as specified by paddings and pad_value. The input should be a k-D tensor(k > 0 and k < 7). As an example:
...@@ -99,7 +98,7 @@ class PadOpGrad : public framework::OperatorWithKernel { ...@@ -99,7 +98,7 @@ class PadOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null"); "Input(Out@GRAD) should not be null");
...@@ -111,11 +110,29 @@ class PadOpGrad : public framework::OperatorWithKernel { ...@@ -111,11 +110,29 @@ class PadOpGrad : public framework::OperatorWithKernel {
} }
}; };
class PadOpGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto* bind = new framework::OpDescBind();
bind->SetInput("X", Input("X"));
bind->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
bind->SetOutput(framework::GradVarName("X"), InputGrad("X"));
bind->SetAttrMap(Attrs());
bind->SetType("pad_grad");
return std::unique_ptr<framework::OpDescBind>(bind);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(pad, ops::PadOp, ops::PadOpMaker, pad_grad, ops::PadOpGrad);
REGISTER_OPERATOR(pad, ops::PadOp, ops::PadOpMaker, ops::PadOpGradMaker);
REGISTER_OPERATOR(pad_grad, ops::PadOpGrad);
REGISTER_OP_CPU_KERNEL(pad, ops::PadKernel<paddle::platform::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(pad, ops::PadKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(pad_grad, REGISTER_OP_CPU_KERNEL(pad_grad,
ops::PadGradKernel<paddle::platform::CPUPlace, float>); ops::PadGradKernel<paddle::platform::CPUPlace, float>);
...@@ -27,7 +27,7 @@ class PoolOp : public framework::OperatorWithKernel { ...@@ -27,7 +27,7 @@ class PoolOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"X(Input) of Pooling should not be null."); "X(Input) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -74,7 +74,7 @@ class PoolOpGrad : public framework::OperatorWithKernel { ...@@ -74,7 +74,7 @@ class PoolOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"X(Input) of Pooling should not be null."); "X(Input) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
......
...@@ -26,7 +26,7 @@ class PReluOp : public framework::OperatorWithKernel { ...@@ -26,7 +26,7 @@ class PReluOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Alpha"), "Input(Alpha) should not be null"); PADDLE_ENFORCE(ctx->HasInput("Alpha"), "Input(Alpha) should not be null");
PADDLE_ENFORCE(product(ctx->GetInputDim("Alpha")) == 1, PADDLE_ENFORCE(product(ctx->GetInputDim("Alpha")) == 1,
...@@ -63,7 +63,7 @@ class PReluGradOp : public framework::OperatorWithKernel { ...@@ -63,7 +63,7 @@ class PReluGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null"); "Input(Out@GRAD) should not be null");
......
...@@ -25,7 +25,7 @@ class RankLossOp : public framework::OperatorWithKernel { ...@@ -25,7 +25,7 @@ class RankLossOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
// input check // input check
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null");
PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null"); PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null");
...@@ -90,7 +90,7 @@ class RankLossGradOp : public framework::OperatorWithKernel { ...@@ -90,7 +90,7 @@ class RankLossGradOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("Left"), "Input(Left) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("Right"), "Input(Right) shouldn't be null.");
......
...@@ -24,7 +24,7 @@ class ReduceOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class ReduceOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ReduceOp should not be null."); "Input(X) of ReduceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -58,7 +58,7 @@ class ReduceGradOp : public framework::OperatorWithKernel { ...@@ -58,7 +58,7 @@ class ReduceGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null."); "Input(Out@GRAD) should not be null.");
...@@ -168,36 +168,22 @@ namespace ops = paddle::operators; ...@@ -168,36 +168,22 @@ namespace ops = paddle::operators;
REGISTER_OP(reduce_sum, ops::ReduceOp, ops::ReduceSumOpMaker, reduce_sum_grad, REGISTER_OP(reduce_sum, ops::ReduceOp, ops::ReduceSumOpMaker, reduce_sum_grad,
ops::ReduceGradOp); ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_sum,
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::SumFunctor>);
REGISTER_OP_CPU_KERNEL(reduce_sum_grad,
ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
ops::SumGradFunctor>);
REGISTER_OP(reduce_mean, ops::ReduceOp, ops::ReduceMeanOpMaker, REGISTER_OP(reduce_mean, ops::ReduceOp, ops::ReduceMeanOpMaker,
reduce_mean_grad, ops::ReduceGradOp); reduce_mean_grad, ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_mean,
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MeanFunctor>);
REGISTER_OP_CPU_KERNEL(reduce_mean_grad,
ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
ops::MeanGradFunctor>);
REGISTER_OP(reduce_max, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_max_grad, REGISTER_OP(reduce_max, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_max_grad,
ops::ReduceGradOp); ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_max, REGISTER_OP(reduce_min, ops::ReduceOp, ops::ReduceMinOpMaker, reduce_min_grad,
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MaxFunctor>);
REGISTER_OP_CPU_KERNEL(reduce_max_grad,
ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
ops::MaxOrMinGradFunctor>);
REGISTER_OP(reduce_min, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_min_grad,
ops::ReduceGradOp); ops::ReduceGradOp);
REGISTER_OP_CPU_KERNEL(
reduce_min, #define REGISTER_REDUCE_CPU_KERNEL(reduce_type, functor, grad_functor) \
ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MinFunctor>); REGISTER_OP_CPU_KERNEL( \
REGISTER_OP_CPU_KERNEL(reduce_min_grad, reduce_type, \
ops::ReduceGradKernel<paddle::platform::CPUPlace, float, ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::functor>); \
ops::MaxOrMinGradFunctor>); REGISTER_OP_CPU_KERNEL(reduce_type##_grad, \
ops::ReduceGradKernel<paddle::platform::CPUPlace, \
float, ops::grad_functor>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_REDUCE_CPU_KERNEL);
...@@ -17,30 +17,12 @@ ...@@ -17,30 +17,12 @@
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( #define REGISTER_REDUCE_GPU_KERNEL(reduce_type, functor, grad_functor) \
reduce_sum, REGISTER_OP_GPU_KERNEL( \
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::SumFunctor>); reduce_type, \
REGISTER_OP_GPU_KERNEL(reduce_sum_grad, ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::functor>); \
ops::ReduceGradKernel<paddle::platform::GPUPlace, float, REGISTER_OP_GPU_KERNEL(reduce_type##_grad, \
ops::SumGradFunctor>); ops::ReduceGradKernel<paddle::platform::GPUPlace, \
float, ops::grad_functor>);
REGISTER_OP_GPU_KERNEL(
reduce_mean, FOR_EACH_KERNEL_FUNCTOR(REGISTER_REDUCE_GPU_KERNEL);
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MeanFunctor>);
REGISTER_OP_GPU_KERNEL(reduce_mean_grad,
ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
ops::MeanGradFunctor>);
REGISTER_OP_GPU_KERNEL(
reduce_max,
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MaxFunctor>);
REGISTER_OP_GPU_KERNEL(reduce_max_grad,
ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
ops::MaxOrMinGradFunctor>);
REGISTER_OP_GPU_KERNEL(
reduce_min,
ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MinFunctor>);
REGISTER_OP_GPU_KERNEL(reduce_min_grad,
ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
ops::MaxOrMinGradFunctor>);
...@@ -198,3 +198,9 @@ class ReduceGradKernel : public framework::OpKernel<T> { ...@@ -198,3 +198,9 @@ class ReduceGradKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \
__macro(reduce_sum, SumFunctor, SumGradFunctor); \
__macro(reduce_mean, MeanFunctor, MeanGradFunctor); \
__macro(reduce_max, MaxFunctor, MaxOrMinGradFunctor); \
__macro(reduce_min, MinFunctor, MaxOrMinGradFunctor);
...@@ -26,7 +26,7 @@ class ReshapeOp : public framework::OperatorWithKernel { ...@@ -26,7 +26,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
// input check // input check
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ReshapeOp should not be null."); "Input(X) of ReshapeOp should not be null.");
...@@ -94,7 +94,7 @@ class ReshapeGradOp : public framework::OperatorWithKernel { ...@@ -94,7 +94,7 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) shouldn't be null."); "Input(Out@GRAD) shouldn't be null.");
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/rmsprop_op.h"
namespace paddle {
namespace operators {
class RmspropOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("MeanSquare"),
"Input(MeanSquare) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(Moment) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(param_out) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(Momentum_out) of RmspropOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MeanSquareOut"),
"Output(MeanSquareOut) of RmspropOp should not be null.");
auto param_dim = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Grad"),
"Param and grad input of RmspropOp should have the same dimension.");
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Moment"),
"Param and Momentum input of RmspropOp "
"should have the same dimension.");
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("MeanSquare"),
"Param and Momentum input of RmspropOp "
"should have the same dimension.");
auto lr_dim = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dim), 1,
"Learning Rate should be a scalar.");
ctx->SetOutputDim("ParamOut", param_dim);
ctx->SetOutputDim("MomentOut", param_dim);
ctx->SetOutputDim("MeanSquareOut", param_dim);
}
};
class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RmspropOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param",
"(Tensor, default Tensor<float>) "
"Input parameter value that has to be updated");
AddInput("MeanSquare",
"(Tensor, default Tensor<float>)"
" The mean square value that gets updated");
AddInput("LearningRate",
"(Tensor, default Tensor<float>) "
"The learning rate should be a tensor of size 1");
AddInput("Grad",
"(Tensor, default Tensor<float>) "
"Input gradient of the parameter");
AddInput("Moment",
"(Tensor, default Tensor<float>) The moment that gets updated");
AddOutput("ParamOut", "(Tensor) Output updated parameter value");
AddOutput("MomentOut", "(Tensor) Output updated moment");
AddOutput("MeanSquareOut", "(Tensor) Output Mean squared updated value");
AddAttr<float>("epsilon",
"(float, default 1e-10) Constant "
"for numerical stability.")
.SetDefault(1.0e-10f);
AddAttr<float>("decay",
"(float, default 0.9) "
"Discounting factor for coming gradient.")
.SetDefault(0.9f);
AddAttr<float>("momentum", "(float, default 0.0) Constant value")
.SetDefault(0.0f);
AddComment(R"DOC(
RMSprop
MeanSquareOut = decay * MeanSquare + (1 - decay) * Grad * Grad
MomentOut = momentum * Moment +
LearningRate * Grad / sqrt(MeanSquareOut + epsilon)
ParamOut = Param - MomentOut
The original slides that proposed RMSprop: Slide 29 of
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(rmsprop, ops::RmspropOp, ops::RmspropOpMaker);
REGISTER_OP_CPU_KERNEL(rmsprop,
ops::RmspropOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/rmsprop_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(rmsprop,
ops::RmspropOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T>
class RmspropOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* param_out = ctx.Output<Tensor>("ParamOut");
auto* moment_out = ctx.Output<Tensor>("MomentOut");
auto* mean_square_out = ctx.Output<Tensor>("MeanSquareOut");
auto grad = ctx.Input<Tensor>("Grad");
param_out->mutable_data<T>(ctx.GetPlace());
moment_out->mutable_data<T>(ctx.GetPlace());
mean_square_out->mutable_data<T>(ctx.GetPlace());
float epsilon = ctx.Attr<float>("epsilon");
float rho = ctx.Attr<float>("decay");
float momentum = ctx.Attr<float>("momentum");
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Param"));
auto ms = EigenVector<T>::Flatten(*ctx.Input<Tensor>("MeanSquare"));
auto lr = EigenVector<T>::Flatten(*ctx.Input<Tensor>("LearningRate"));
auto g = EigenVector<T>::Flatten(*grad);
auto mom = EigenVector<T>::Flatten(*ctx.Input<Tensor>("Moment"));
auto p_out = EigenVector<T>::Flatten(*param_out);
auto mom_out = EigenVector<T>::Flatten(*moment_out);
auto ms_out = EigenVector<T>::Flatten(*mean_square_out);
auto place = ctx.GetEigenDevice<Place>();
Eigen::DSizes<int, 1> grad_dsize(grad->numel());
ms_out.device(place) = rho * ms + (1 - rho) * g * g;
mom_out.device(place) =
momentum * mom +
lr.broadcast(grad_dsize) * g / (ms_out + epsilon).sqrt();
p_out.device(place) = p - mom_out;
}
};
} // namespace operators
} // namespace paddle
...@@ -26,7 +26,7 @@ class ScaleOp : public framework::OperatorWithKernel { ...@@ -26,7 +26,7 @@ class ScaleOp : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {} : OperatorWithKernel(type, inputs, outputs, attrs) {}
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of ScaleOp should not be null."); "Input(X) of ScaleOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -41,8 +41,8 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -41,8 +41,8 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of scale operator.").NotInGradient(); AddInput("X", "The input tensor of scale operator.");
AddOutput("Out", "The output tensor of scale operator.").NotInGradient(); AddOutput("Out", "The output tensor of scale operator.");
AddComment(R"DOC(Scale operator AddComment(R"DOC(Scale operator
The equation is: Out = scale*X The equation is: Out = scale*X
...@@ -52,21 +52,18 @@ The equation is: Out = scale*X ...@@ -52,21 +52,18 @@ The equation is: Out = scale*X
} }
}; };
// The operator to calculate gradients of a scale operator is just the scale class ScaleGradMaker : public framework::SingleGradOpDescMaker {
// operator itself.
// Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out))
template <typename AttrType>
class ScaleGradOp : public NetOp {
public: public:
ScaleGradOp(const std::string &type, const framework::VariableNameMap &inputs, using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs) protected:
: NetOp(type, inputs, outputs, attrs) { std::unique_ptr<framework::OpDescBind> Apply() const override {
AppendOp(framework::OpRegistry::CreateOp( auto *grad_op = new framework::OpDescBind();
"scale", {{"X", {Input(framework::GradVarName("Out"))}}}, grad_op->SetType("scale");
{{"Out", {Output(framework::GradVarName("X"))}}}, grad_op->SetInput("X", OutputGrad("Out"));
{{"scale", Attr<AttrType>("scale")}})); grad_op->SetOutput("Out", InputGrad("X"));
CompleteAddOp(false); grad_op->SetAttr("scale", GetAttr("scale"));
return std::unique_ptr<framework::OpDescBind>(grad_op);
} }
}; };
...@@ -75,7 +72,7 @@ class ScaleGradOp : public NetOp { ...@@ -75,7 +72,7 @@ class ScaleGradOp : public NetOp {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(scale, ops::ScaleOp, ops::ScaleOpMaker<float>, scale_grad, REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker<float>,
ops::ScaleGradOp<float>); ops::ScaleGradMaker);
REGISTER_OP_CPU_KERNEL(scale, REGISTER_OP_CPU_KERNEL(scale,
ops::ScaleKernel<paddle::platform::CPUPlace, float>); ops::ScaleKernel<paddle::platform::CPUPlace, float>);
...@@ -23,7 +23,7 @@ class ScatterOp : public framework::OperatorWithKernel { ...@@ -23,7 +23,7 @@ class ScatterOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Ref"), PADDLE_ENFORCE(ctx->HasInput("Ref"),
"Input(Ref) of ScatterOp should not be null."); "Input(Ref) of ScatterOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Index"), PADDLE_ENFORCE(ctx->HasInput("Index"),
...@@ -60,7 +60,7 @@ class ScatterGradOp : public framework::OperatorWithKernel { ...@@ -60,7 +60,7 @@ class ScatterGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
ctx->SetOutputDim(framework::GradVarName("Updates"), ctx->SetOutputDim(framework::GradVarName("Updates"),
ctx->GetInputDim("Updates")); ctx->GetInputDim("Updates"));
ctx->SetOutputDim(framework::GradVarName("Ref"), ctx->GetInputDim("Ref")); ctx->SetOutputDim(framework::GradVarName("Ref"), ctx->GetInputDim("Ref"));
......
...@@ -22,7 +22,7 @@ class SequencePoolOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class SequencePoolOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequencePoolOp should not be null."); "Input(X) of SequencePoolOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -74,7 +74,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { ...@@ -74,7 +74,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Gradient of Out should not be null."); "Gradient of Out should not be null.");
PADDLE_ENFORCE(ctx->HasInput("X"), "The input X should not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "The input X should not be null.");
......
...@@ -22,7 +22,7 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SequenceSoftmaxOp should not be null."); "Input(X) of SequenceSoftmaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -67,7 +67,7 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { ...@@ -67,7 +67,7 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Out"), PADDLE_ENFORCE(ctx->HasInput("Out"),
"Input(Out) of SequenceSoftmaxGradOp should not be null."); "Input(Out) of SequenceSoftmaxGradOp should not be null.");
PADDLE_ENFORCE( PADDLE_ENFORCE(
......
...@@ -22,7 +22,7 @@ class SGDOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class SGDOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"), PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of SGDOp should not be null."); "Input(Param) of SGDOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"), PADDLE_ENFORCE(ctx->HasInput("Grad"),
......
...@@ -19,28 +19,25 @@ limitations under the License. */ ...@@ -19,28 +19,25 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class SGDOpKernel : public framework::OpKernel<T> { class SGDOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto param = ctx.Input<Tensor>("Param"); auto param = ctx.Input<framework::Tensor>("Param");
auto grad = ctx.Input<Tensor>("Grad"); auto grad = ctx.Input<framework::Tensor>("Grad");
auto param_out = ctx.Output<Tensor>("ParamOut"); auto param_out = ctx.Output<framework::Tensor>("ParamOut");
float lr = ctx.Input<Tensor>("LearningRate")->data<float>()[0]; auto learning_rate = ctx.Input<framework::Tensor>("LearningRate");
param_out->mutable_data<T>(ctx.GetPlace()); param_out->mutable_data<T>(ctx.GetPlace());
auto p = EigenVector<T>::Flatten(*param); auto p = framework::EigenVector<T>::Flatten(*param);
auto g = EigenVector<T>::Flatten(*grad); auto g = framework::EigenVector<T>::Flatten(*grad);
auto o = EigenVector<T>::Flatten(*param_out); auto o = framework::EigenVector<T>::Flatten(*param_out);
auto lr = framework::EigenVector<T>::Flatten(*learning_rate);
auto place = ctx.GetEigenDevice<Place>(); auto place = ctx.GetEigenDevice<Place>();
o.device(place) = p - lr * g; Eigen::DSizes<int, 1> grad_dsize(grad->numel());
o.device(place) = p - lr.broadcast(grad_dsize) * g;
} }
}; };
......
...@@ -24,7 +24,7 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Labels"), PADDLE_ENFORCE(ctx->HasInput("Labels"),
"Input(Labels) should be not null."); "Input(Labels) should be not null.");
...@@ -53,7 +53,7 @@ class SigmoidCrossEntropyWithLogitsGradOp ...@@ -53,7 +53,7 @@ class SigmoidCrossEntropyWithLogitsGradOp
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Labels"), PADDLE_ENFORCE(ctx->HasInput("Labels"),
"Input(Labels) should be not null."); "Input(Labels) should be not null.");
......
...@@ -22,7 +22,7 @@ class SmoothL1LossOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class SmoothL1LossOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized."); PADDLE_ENFORCE(ctx->HasInput("X"), "X must be initialized.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized."); PADDLE_ENFORCE(ctx->HasInput("Y"), "Y must be initialized.");
...@@ -94,7 +94,7 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel { ...@@ -94,7 +94,7 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("X"); auto in_dims = ctx->GetInputDim("X");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
......
...@@ -22,7 +22,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SoftmaxOp should not be null."); "Input(X) of SoftmaxOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"), PADDLE_ENFORCE(ctx->HasOutput("Y"),
...@@ -69,7 +69,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ...@@ -69,7 +69,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
"Input(Y@GRAD) should be not null."); "Input(Y@GRAD) should be not null.");
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/operators/softmax_with_cross_entropy_op.h" #include "paddle/operators/softmax_with_cross_entropy_op.h"
#include <paddle/function/TensorType.h> #include <paddle/function/TensorType.h>
#include <iostream>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -27,15 +28,14 @@ class SoftmaxWithCrossEntropyOpMaker ...@@ -27,15 +28,14 @@ class SoftmaxWithCrossEntropyOpMaker
AddInput("Logits", AddInput("Logits",
"(Tensor, default: Tensor<float>), The unscaled log probabilities " "(Tensor, default: Tensor<float>), The unscaled log probabilities "
"which is a 2-D tensor with shape [N x K]. N is the batch_size, " "which is a 2-D tensor with shape [N x K]. N is the batch_size, "
"and K is the class number.") "and K is the class number.");
.NotInGradient(); AddInput("Label",
AddInput( "(Tensor, default: Tensor<int>), The ground truth which is a 2-D "
"Label", "tensor. "
"(Tensor, default: Tensor<int>), The ground truth which is a 2-D " "If softLable is set to 0, Label is a Tensor<int> with shape [N x "
"tensor. " "1]. "
"If softLable is set to 0, Label is a Tensor<int> with shape [N x 1]. " "If softLable is set to 1, Label is a Tensor<float/double> "
"If softLable is set to 1, Label is a Tensor<float/double> " "with shape [N x K].");
"with shape [N x K].");
AddOutput( AddOutput(
"Softmax", "Softmax",
"(Tensor, default: Tensor<float>), A 2-D tensor with shape [N x K]. " "(Tensor, default: Tensor<float>), A 2-D tensor with shape [N x K]. "
...@@ -83,7 +83,7 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { ...@@ -83,7 +83,7 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Logits"), PADDLE_ENFORCE(ctx->HasInput("Logits"),
"Input(Logits) should be not null."); "Input(Logits) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null.");
...@@ -128,7 +128,7 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { ...@@ -128,7 +128,7 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")),
"Input(Loss@Grad) should not be null."); "Input(Loss@Grad) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Softmax"), PADDLE_ENFORCE(ctx->HasInput("Softmax"),
...@@ -163,15 +163,34 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { ...@@ -163,15 +163,34 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
} }
}; };
class SoftmaxGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto* grad_op = new framework::OpDescBind();
grad_op->SetType("softmax_with_cross_entropy_grad");
grad_op->SetInput("Label", Input("Label"));
grad_op->SetInput("Softmax", Output("Softmax"));
grad_op->SetInput("Loss", Output("Loss"));
grad_op->SetInput(framework::GradVarName("Softmax"), OutputGrad("Softmax"));
grad_op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp, REGISTER_OPERATOR(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp,
ops::SoftmaxWithCrossEntropyOpMaker, ops::SoftmaxWithCrossEntropyOpMaker, ops::SoftmaxGradMaker);
softmax_with_cross_entropy_grad, REGISTER_OPERATOR(softmax_with_cross_entropy_grad,
ops::SoftmaxWithCrossEntropyOpGrad); ops::SoftmaxWithCrossEntropyOpGrad);
REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy, REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy,
ops::SoftmaxWithCrossEntropyKernel<float>); ops::SoftmaxWithCrossEntropyKernel<float>);
REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad, REGISTER_OP_CPU_KERNEL(softmax_with_cross_entropy_grad,
......
...@@ -24,7 +24,7 @@ class SplitOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class SplitOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SplitOp should not be null."); "Input(X) of SplitOp should not be null.");
PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL,
......
...@@ -22,7 +22,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SquaredL2DistanceOp should not be null."); "Input(X) of SquaredL2DistanceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), PADDLE_ENFORCE(ctx->HasInput("Y"),
...@@ -86,7 +86,7 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { ...@@ -86,7 +86,7 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Gradient of Out should not be null"); "Gradient of Out should not be null");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
......
...@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) { ...@@ -72,7 +72,7 @@ TEST(StridedMemcpy, CPUConcat) {
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(StridedMemcpy, GPUCrop) { TEST(StridedMemcpy, GPUCrop) {
// clang-format off // clang-format off
int src[] = { int src[] = {
...@@ -157,4 +157,4 @@ TEST(StridedMemcpy, GPUConcat) { ...@@ -157,4 +157,4 @@ TEST(StridedMemcpy, GPUConcat) {
#endif #endif
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
\ No newline at end of file
...@@ -22,7 +22,7 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class SumOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInputs("X"), "Inputs(X) should not be null"); PADDLE_ENFORCE(ctx->HasInputs("X"), "Inputs(X) should not be null");
auto x_dims = ctx->GetInputsDim("X"); auto x_dims = ctx->GetInputsDim("X");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
...@@ -45,10 +45,8 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -45,10 +45,8 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SumOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) SumOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensors of sum operator.") AddInput("X", "the input tensors of sum operator.").AsDuplicable();
.AsDuplicable() AddOutput("Out", "the output tensor of sum operator.");
.NotInGradient();
AddOutput("Out", "the output tensor of sum operator.").NotInGradient();
AddComment(R"DOC( AddComment(R"DOC(
Sum the input tensors. Sum the input tensors.
...@@ -58,23 +56,26 @@ or not. But the output only shares the LoD with the first input. ...@@ -58,23 +56,26 @@ or not. But the output only shares the LoD with the first input.
} }
}; };
class SumGradOp : public NetOp { class SumGradMaker : public framework::GradOpDescMakerBase {
public: public:
SumGradOp(const std::string& type, const framework::VariableNameMap& inputs, using framework::GradOpDescMakerBase::GradOpDescMakerBase;
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: NetOp(type, inputs, outputs, attrs) {
auto& x_grad_names = Outputs(framework::GradVarName("X"));
auto out_grad_name = this->Input(framework::GradVarName("Out"));
framework::AttributeMap grad_attrs; std::vector<std::unique_ptr<framework::OpDescBind>> operator()()
grad_attrs["scale"] = 1.0f; const override {
for (auto& x_grad_name : x_grad_names) { auto x_grads = InputGrad("X");
AppendOp(framework::OpRegistry::CreateOp( std::vector<std::unique_ptr<framework::OpDescBind>> grad_ops;
"scale", {{"X", {out_grad_name}}}, {{"Out", {x_grad_name}}}, grad_ops.reserve(x_grads.size());
grad_attrs)); auto og = OutputGrad("Out");
} std::transform(x_grads.begin(), x_grads.end(), std::back_inserter(grad_ops),
CompleteAddOp(false); [&og](const std::string& x_grad) {
auto* grad_op = new framework::OpDescBind();
grad_op->SetType("scale");
grad_op->SetInput("X", og);
grad_op->SetOutput("Out", {x_grad});
grad_op->SetAttr("scale", 1.0f);
return std::unique_ptr<framework::OpDescBind>(grad_op);
});
return grad_ops;
} }
}; };
...@@ -82,5 +83,6 @@ class SumGradOp : public NetOp { ...@@ -82,5 +83,6 @@ class SumGradOp : public NetOp {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(sum, ops::SumOp, ops::SumOpMaker, sum_grad, ops::SumGradOp);
REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker);
REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel<paddle::platform::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel<paddle::platform::CPUPlace, float>);
...@@ -22,7 +22,7 @@ class TopkOp : public framework::OperatorWithKernel { ...@@ -22,7 +22,7 @@ class TopkOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of TopkOp should not be null."); "Input(X) of TopkOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
......
...@@ -24,7 +24,7 @@ class TransposeOp : public framework::OperatorWithKernel { ...@@ -24,7 +24,7 @@ class TransposeOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null"); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null");
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
...@@ -93,7 +93,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel { ...@@ -93,7 +93,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null"); "Input(Out@GRAD) should not be null");
......
...@@ -47,7 +47,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { ...@@ -47,7 +47,7 @@ class UniformRandomOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContextBase* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of UniformRandomOp should not be null."); "Output(Out) of UniformRandomOp should not be null.");
......
...@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { ...@@ -35,7 +35,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place CPUDeviceContext::GetPlace() const { return CPUPlace(); } Place CPUDeviceContext::GetPlace() const { return CPUPlace(); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
Eigen::GpuDevice* Eigen::GpuDevice*
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/gpu_info.h" #include "paddle/platform/gpu_info.h"
...@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext { ...@@ -61,7 +61,7 @@ class CPUDeviceContext : public DeviceContext {
std::unique_ptr<Eigen::DefaultDevice> eigen_device_; std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
}; };
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <> template <>
struct EigenDeviceConverter<platform::GPUPlace> { struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice; using EigenDeviceType = Eigen::GpuDevice;
......
...@@ -29,7 +29,7 @@ limitations under the License. */ ...@@ -29,7 +29,7 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle #include <cxxabi.h> // for __cxa_demangle
#endif #endif
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
...@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( ...@@ -113,7 +113,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
} }
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <typename... Args> template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error( inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
......
...@@ -213,4 +213,4 @@ TEST(ENFORCE_USER_DEFINED_CLASS, EQ) { ...@@ -213,4 +213,4 @@ TEST(ENFORCE_USER_DEFINED_CLASS, EQ) {
TEST(ENFORCE_USER_DEFINED_CLASS, NE) { TEST(ENFORCE_USER_DEFINED_CLASS, NE) {
Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}};
ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet); ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet);
} }
\ No newline at end of file
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <stddef.h> #include <stddef.h>
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <boost/config.hpp> #include <boost/config.hpp>
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
// Because boost's variadic templates has bug on nvcc, boost will disable // Because boost's variadic templates has bug on nvcc, boost will disable
// variadic template support when GPU enabled on nvcc. // variadic template support when GPU enabled on nvcc.
......
...@@ -215,7 +215,7 @@ int main(int argc, char** argv) { ...@@ -215,7 +215,7 @@ int main(int argc, char** argv) {
uint64_t dataSize = FLAGS_dim * sizeof(real); uint64_t dataSize = FLAGS_dim * sizeof(real);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
GpuVector gpuParam(FLAGS_dim); GpuVector gpuParam(FLAGS_dim);
GpuVector gpuGrad(FLAGS_dim); GpuVector gpuGrad(FLAGS_dim);
#else #else
......
...@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) { ...@@ -99,7 +99,7 @@ TEST(ProtoServer, regular) {
} }
TEST(ProtoServer, extended) { TEST(ProtoServer, extended) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
ProtoClient* client; ProtoClient* client;
if (FLAGS_rdma_tcp == "rdma") if (FLAGS_rdma_tcp == "rdma")
client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA); client = new ProtoClient(FLAGS_server_addr, FLAGS_port, F_RDMA);
......
if(WITH_PYTHON) if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc SRCS pybind.cc exception.cc protobuf.cc
DEPS pybind python backward proto_desc DEPS pybind python backward proto_desc tensor_array
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
endif(WITH_PYTHON) endif(WITH_PYTHON)
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/tensor_array.h"
#include "paddle/operators/cond_op.h" #include "paddle/operators/cond_op.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h" #include "paddle/operators/recurrent_op.h"
...@@ -34,7 +35,7 @@ static size_t UniqueIntegerGenerator() { ...@@ -34,7 +35,7 @@ static size_t UniqueIntegerGenerator() {
} }
bool IsCompileGPU() { bool IsCompileGPU() {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return false; return false;
#else #else
return true; return true;
...@@ -78,7 +79,7 @@ PYBIND11_PLUGIN(core) { ...@@ -78,7 +79,7 @@ PYBIND11_PLUGIN(core) {
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>) .def("set", PyCPUTensorSetFromArray<double>)
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
.def("set", PyCUDATensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>) .def("set", PyCUDATensorSetFromArray<int>)
.def("set", PyCUDATensorSetFromArray<double>) .def("set", PyCUDATensorSetFromArray<double>)
...@@ -96,7 +97,7 @@ PYBIND11_PLUGIN(core) { ...@@ -96,7 +97,7 @@ PYBIND11_PLUGIN(core) {
.def( .def(
"__init__", "__init__",
[](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
new (&instance) LoDTensor(lod); new (&instance) LoDTensor(lod);
#else #else
LoD new_lod; LoD new_lod;
...@@ -107,7 +108,7 @@ PYBIND11_PLUGIN(core) { ...@@ -107,7 +108,7 @@ PYBIND11_PLUGIN(core) {
}) })
.def("set_lod", .def("set_lod",
[](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) { [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
self.set_lod(lod); self.set_lod(lod);
#else #else
LoD new_lod; LoD new_lod;
...@@ -117,7 +118,7 @@ PYBIND11_PLUGIN(core) { ...@@ -117,7 +118,7 @@ PYBIND11_PLUGIN(core) {
#endif #endif
}) })
.def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> { .def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return self.lod(); return self.lod();
#else #else
auto lod = self.lod(); auto lod = self.lod();
...@@ -203,7 +204,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -203,7 +204,7 @@ All parameter, weight, gradient are variables in Paddle.
.def_static("create", .def_static("create",
[](paddle::platform::GPUPlace& place) [](paddle::platform::GPUPlace& place)
-> paddle::platform::DeviceContext* { -> paddle::platform::DeviceContext* {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("GPUPlace is not supported in CPU device."); PADDLE_THROW("GPUPlace is not supported in CPU device.");
#else #else
return new paddle::platform::CUDADeviceContext(place); return new paddle::platform::CUDADeviceContext(place);
...@@ -230,6 +231,21 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -230,6 +231,21 @@ All parameter, weight, gradient are variables in Paddle.
desc.InitializationErrorString()); desc.InitializationErrorString());
return OpRegistry::CreateOp(desc); return OpRegistry::CreateOp(desc);
}) })
.def_static("infer_shape",
[](OpDescBind &op_desc, BlockDescBind &block) {
auto op = OpRegistry::CreateOp(*op_desc.Proto());
auto *op_with_kernel =
dynamic_cast<OperatorWithKernel *>(op.get());
if (op_with_kernel != nullptr) {
auto ctx = CompileTimeInferShapeContext(op_desc, block);
op_with_kernel->InferShape(&ctx);
} else {
PADDLE_THROW(
"OP(%s) is not type of OperatorWithKernel, "
"should not call this function",
op_desc.Type());
}
})
.def("backward", .def("backward",
[](const OperatorBase &forwardOp, [](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) { const std::unordered_set<std::string> &no_grad_vars) {
...@@ -271,6 +287,56 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -271,6 +287,56 @@ All parameter, weight, gradient are variables in Paddle.
self->CompleteAddOp(); self->CompleteAddOp();
}); });
py::class_<framework::TensorArray>(m, "TensorArray")
.def("__init__",
[](TensorArray &instance) { new (&instance) TensorArray(); })
.def("read",
[](TensorArray &self, size_t index) { return self.Read(index); })
.def("write", [](TensorArray &self, size_t index,
LoDTensor &value) { self.Write(index, value); })
.def("write_shared",
[](TensorArray &self, size_t index, const LoDTensor &value) {
self.WriteShared(index, value);
})
.def("size", [](TensorArray &self) { return self.size(); })
.def("pack",
[](TensorArray &self, size_t level,
const std::vector<std::vector<size_t>> &meta_info,
const std::vector<std::vector<size_t>> &lod) {
std::vector<DySeqMeta> meta;
for (auto &info : meta_info) {
PADDLE_ENFORCE_EQ(info.size(), 3UL);
meta.emplace_back(info[0], info[1], info[2]);
}
#ifndef PADDLE_WITH_CUDA
return self.Pack(level, meta, lod);
#else
LoD new_lod;
new_lod.reserve(lod.size());
std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
return self.Pack(level, meta, new_lod);
#endif
})
.def("unpack",
[](TensorArray &self, const LoDTensor &source, int level,
bool length_descend) {
auto metas = self.Unpack(source, level, length_descend);
std::vector<std::vector<size_t>> meta_info;
for (auto meta : metas) {
meta_info.emplace_back(
std::vector<size_t>({meta.begin, meta.end, meta.ori_idx}));
}
return meta_info;
})
.def("stack", [](TensorArray &self) { return self.Stack(); })
.def("unstack",
[](TensorArray &self, const LoDTensor &source) {
return self.Unstack(source);
})
.def("unstack_shared", [](TensorArray &self, const LoDTensor &source) {
return self.UnstackShared(source);
});
// recurrent_op // recurrent_op
py::class_<operators::RecurrentOp, OperatorBase>(m, "RecurrentOp") py::class_<operators::RecurrentOp, OperatorBase>(m, "RecurrentOp")
.def_static( .def_static(
......
...@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray( ...@@ -106,7 +106,7 @@ void PyCPUTensorSetFromArray(
std::memcpy(dst, array.data(), sizeof(T) * array.size()); std::memcpy(dst, array.data(), sizeof(T) * array.size());
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
template <typename T> template <typename T>
void PyCUDATensorSetFromArray( void PyCUDATensorSetFromArray(
framework::Tensor &self, framework::Tensor &self,
......
...@@ -36,4 +36,4 @@ TEST(to_string, user_defined) { ...@@ -36,4 +36,4 @@ TEST(to_string, user_defined) {
using namespace paddle::string; using namespace paddle::string;
UserDefinedClass instance; UserDefinedClass instance;
ASSERT_EQ(kOutputString, to_string(instance)); ASSERT_EQ(kOutputString, to_string(instance));
} }
\ No newline at end of file
...@@ -29,7 +29,7 @@ int main(int argc, char** argv) { ...@@ -29,7 +29,7 @@ int main(int argc, char** argv) {
initMain(argc, argv); initMain(argc, argv);
initPython(argc, argv); initPython(argc, argv);
string confFile = TrainerConfigHelper::getConfigNameFromPath(FLAGS_model_dir); string confFile = TrainerConfigHelper::getConfigNameFromPath(FLAGS_model_dir);
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
FLAGS_use_gpu = false; FLAGS_use_gpu = false;
#endif #endif
auto config = std::make_shared<TrainerConfigHelper>(confFile); auto config = std::make_shared<TrainerConfigHelper>(confFile);
......
...@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) { ...@@ -146,7 +146,7 @@ void compareGradient(comData& comDataCpu, comData& comDataGpu) {
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
exit(0); exit(0);
#endif #endif
paddle::initMain(argc, argv); paddle::initMain(argc, argv);
......
...@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) { ...@@ -174,7 +174,7 @@ TEST(compareSparse, multiGradientMachine) {
FLAGS_local = local; FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5; FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) { for (bool useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) continue; if (useGpu) continue;
#endif #endif
FLAGS_parallel_nn = useGpu; FLAGS_parallel_nn = useGpu;
...@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) { ...@@ -198,7 +198,7 @@ TEST(compareSparse, NeuralNetwork) {
FLAGS_local = local; FLAGS_local = local;
FLAGS_ports_num_for_sparse = 5; FLAGS_ports_num_for_sparse = 5;
for (bool useGpu : {false, true}) { for (bool useGpu : {false, true}) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
if (useGpu) continue; if (useGpu) continue;
#endif #endif
FLAGS_parallel_nn = useGpu; FLAGS_parallel_nn = useGpu;
......
...@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile, ...@@ -51,7 +51,7 @@ void checkGradientTest(const string& configFile,
TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); } TEST(checkGradient, cpu) { checkGradientTest(configFile1, false, false); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); } TEST(checkGradient, gpu) { checkGradientTest(configFile1, true, false); }
TEST(checkGradient, multiGpu) { TEST(checkGradient, multiGpu) {
...@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); } ...@@ -97,7 +97,7 @@ TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST(checkGradient, chunk) { TEST(checkGradient, chunk) {
checkGradientTest(configFile3, false, false); checkGradientTest(configFile3, false, false);
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
checkGradientTest(configFile3, true, true); checkGradientTest(configFile3, true, true);
#endif #endif
} }
......
...@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile, ...@@ -79,7 +79,7 @@ void trainerOnePassTest(const string& configFile,
// 1. test trainer (cpu, gpu). // 1. test trainer (cpu, gpu).
TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); } TEST(trainerOnePass, cpu) { trainerOnePassTest(configFile1, false, false); }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); } TEST(trainerOnePass, gpu) { trainerOnePassTest(configFile1, true, false); }
TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); } TEST(trainerOnePass, gpu2) { trainerOnePassTest(configFile1, true, false, 2); }
...@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) { ...@@ -94,7 +94,7 @@ TEST(trainerOnePass, parallel) {
#endif #endif
// 2. test average_window. // 2. test average_window.
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(average_window, gpu) { TEST(average_window, gpu) {
trainerOnePassTest(configFile1, true, false, 4, 0.01); trainerOnePassTest(configFile1, true, false, 4, 0.01);
} }
...@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) { ...@@ -266,7 +266,7 @@ TEST(checkRemoteUpdater, cpuTrainerOldUpdater) {
checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true); checkRemoteParameterUpdaterTest(configFile1, false, false, 1, true);
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
TEST(checkRemoteUpdater, gpuTrainer) { TEST(checkRemoteUpdater, gpuTrainer) {
checkRemoteParameterUpdaterTest(configFile1, true, false); checkRemoteParameterUpdaterTest(configFile1, true, false);
} }
......
...@@ -113,7 +113,7 @@ void testGeneration(const string& configFile, ...@@ -113,7 +113,7 @@ void testGeneration(const string& configFile,
#ifndef PADDLE_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
TEST(RecurrentGradientMachine, test_generation) { TEST(RecurrentGradientMachine, test_generation) {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
const auto useGpuConfs = {false}; const auto useGpuConfs = {false};
#else #else
const auto useGpuConfs = {true, false}; const auto useGpuConfs = {true, false};
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "Flags.h" #include "Flags.h"
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, false, "Only support CPU training");
#else #else
DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training");
......
...@@ -218,7 +218,7 @@ protected: ...@@ -218,7 +218,7 @@ protected:
* *d2* is peer device to enable direct access to by the d1 device. * *d2* is peer device to enable direct access to by the d1 device.
*/ */
inline void enablePeerAccess(int d1, int d2) { inline void enablePeerAccess(int d1, int d2) {
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_CUDA
if (hl_device_can_access_peer(d1, d2)) { if (hl_device_can_access_peer(d1, d2)) {
SetDevice dev(d1); SetDevice dev(d1);
hl_device_enable_peer_access(d2); hl_device_enable_peer_access(d2);
......
...@@ -48,7 +48,7 @@ void printVersion(std::ostream& os); ...@@ -48,7 +48,7 @@ void printVersion(std::ostream& os);
* @return return true if paddle compiled with GPU * @return return true if paddle compiled with GPU
*/ */
constexpr bool isWithGpu() { constexpr bool isWithGpu() {
#ifndef PADDLE_WITH_GPU #ifndef PADDLE_WITH_CUDA
return false; return false;
#else #else
return true; return true;
......
...@@ -10,7 +10,8 @@ There are: ...@@ -10,7 +10,8 @@ There are:
* EndPass * EndPass
""" """
__all__ = [ __all__ = [
'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult' 'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult',
'EndForwardBackward'
] ]
...@@ -73,6 +74,17 @@ class BeginIteration(object): ...@@ -73,6 +74,17 @@ class BeginIteration(object):
self.batch_id = batch_id self.batch_id = batch_id
class EndForwardBackward(object):
"""
Event On One Batch ForwardBackward Complete.
"""
def __init__(self, pass_id, batch_id, gm):
self.pass_id = pass_id
self.batch_id = batch_id
self.gm = gm
class EndIteration(WithMetric): class EndIteration(WithMetric):
""" """
Event On One Batch Training Complete. Event On One Batch Training Complete.
......
...@@ -48,6 +48,21 @@ class TestTanh(OpTest): ...@@ -48,6 +48,21 @@ class TestTanh(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.007) self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestTanhShrink(OpTest):
def setUp(self):
self.op_type = "tanh_shrink"
self.inputs = {
'X': np.random.uniform(0.1, 1, [10, 17]).astype("float32")
}
self.outputs = {'Y': self.inputs['X'] - np.tanh(self.inputs['X'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.008)
class TestSqrt(OpTest): class TestSqrt(OpTest):
def setUp(self): def setUp(self):
self.op_type = "sqrt" self.op_type = "sqrt"
...@@ -122,6 +137,28 @@ class TestBRelu(OpTest): ...@@ -122,6 +137,28 @@ class TestBRelu(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.02) self.check_grad(['X'], 'Y', max_relative_error=0.02)
class TestRelu6(OpTest):
def setUp(self):
self.op_type = "relu6"
x = np.random.uniform(-1, 1, [4, 10]).astype("float32")
threshold = 6.0
# The same with TestAbs
x[np.abs(x) < 0.005] = 0.02
x[np.abs(x - threshold) < 0.005] = threshold + 0.02
self.inputs = {'X': x}
self.attrs = {'threshold': threshold}
self.outputs = {
'Y': np.minimum(np.maximum(self.inputs['X'], 0), threshold)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.02)
class TestSoftRelu(OpTest): class TestSoftRelu(OpTest):
def setUp(self): def setUp(self):
self.op_type = "soft_relu" self.op_type = "soft_relu"
......
import unittest
import numpy as np
from op_test import OpTest
class TestAdadeltaOp1(OpTest):
def setUp(self):
self.op_type = "adadelta"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The squared gradient is positive
avg_squared_grad = np.random.random((102, 105)).astype("float32")
# The squared update is positive
avg_squared_update = np.random.random((102, 105)).astype("float32")
rho = 0.95
epsilon = 1e-6
self.inputs = {
'Param': param,
'Grad': grad,
'AvgSquaredGrad': avg_squared_grad,
'AvgSquaredUpdate': avg_squared_update
}
self.attrs = {'rho': rho, 'epsilon': epsilon}
avg_squared_grad_out = rho * avg_squared_grad + \
(1 - rho) * np.square(grad)
update = -np.multiply(
np.sqrt(
np.divide(avg_squared_update + epsilon, avg_squared_grad_out +
epsilon)), grad)
avg_squared_update_out = rho * avg_squared_update + \
(1 - rho) * np.square(update)
param_out = param + update
self.outputs = {
'ParamOut': param_out,
'AvgSquaredGradOut': avg_squared_grad_out,
'AvgSquaredUpdateOut': avg_squared_update_out
}
def test_check_output(self):
self.check_output()
class TestAdadeltaOp2(OpTest):
'''Test Adadelta op with default attribute values
'''
def setUp(self):
self.op_type = "adadelta"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The squared gradient is positive
avg_squared_grad = np.random.random((102, 105)).astype("float32")
# The squared update is positive
avg_squared_update = np.random.random((102, 105)).astype("float32")
rho = 0.95
epsilon = 1e-6
self.inputs = {
'Param': param,
'Grad': grad,
'AvgSquaredGrad': avg_squared_grad,
'AvgSquaredUpdate': avg_squared_update
}
avg_squared_grad_out = rho * avg_squared_grad + \
(1 - rho) * np.square(grad)
update = -np.multiply(
np.sqrt(
np.divide(avg_squared_update + epsilon, avg_squared_grad_out +
epsilon)), grad)
avg_squared_update_out = rho * avg_squared_update + \
(1 - rho) * np.square(update)
param_out = param + update
self.outputs = {
'ParamOut': param_out,
'AvgSquaredGradOut': avg_squared_grad_out,
'AvgSquaredUpdateOut': avg_squared_update_out
}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
import unittest
import numpy as np
from op_test import OpTest
class TestAdagradOp1(OpTest):
''' Test Adagrad operator with explicit attributes
'''
def setUp(self):
self.op_type = "adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
epsilon = 1e-8
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'epsilon': epsilon}
moment_out = moment + grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
class TestAdagradOp2(OpTest):
''' Test Adagrad operator with default attributes
'''
def setUp(self):
self.op_type = "adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
epsilon = 1e-6
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'epsilon': epsilon}
moment_out = moment + grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
import unittest
import numpy as np
from op_test import OpTest
class TestAdamaxOp1(OpTest):
def setUp(self):
'''Test Adamax Operator with supplied attributes
'''
self.op_type = "adamax"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The infinity norm is positive
inf_norm = np.random.random((102, 105)).astype("float32")
learning_rate = 0.002
beta1 = 0.78
beta2 = 0.899
epsilon = 1e-5
beta1_pow = beta1**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'InfNorm': inf_norm,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32")
}
self.attrs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon}
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, self.attrs)
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'InfNormOut': inf_norm_out,
'Beta1PowOut': beta1_pow_out
}
def test_check_output(self):
self.check_output()
class TestAdamaxOp2(OpTest):
'''Test Adamax Operator with default attributes
'''
def setUp(self):
self.op_type = "adamax"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The infinity norm is positive
inf_norm = np.random.random((102, 105)).astype("float32")
learning_rate = 0.002
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
beta1_pow = beta1**8
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'InfNorm': inf_norm,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32")
}
attrs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon}
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, attrs)
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'InfNormOut': inf_norm_out,
'Beta1PowOut': beta1_pow_out
}
def test_check_output(self):
self.check_output()
class TestAdamaxOpMultipleSteps(OpTest):
def setUp(self):
'''Test Adamax Operator with supplied attributes
'''
self.op_type = "adamax"
self.num_steps = 10
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The infinity norm is positive
inf_norm = np.random.random((102, 105)).astype("float32")
learning_rate = 0.002
beta1 = 0.8
beta2 = 0.99
epsilon = 1e-5
beta1_pow = 1
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'InfNorm': inf_norm,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32")
}
self.attrs = {'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon}
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, self.attrs)
def test_check_output(self):
for _ in range(self.num_steps):
param_out, moment_out, inf_norm_out, beta1_pow_out = adamax_step(
self.inputs, self.attrs)
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'InfNormOut': inf_norm_out,
'Beta1PowOut': beta1_pow_out
}
# Verify output for this step
self.check_output()
# Output of this step becomes input for next step
self.inputs['Param'] = param_out
self.inputs['Moment'] = moment_out
self.inputs['InfNorm'] = inf_norm_out
self.inputs['Beta1Pow'] = beta1_pow_out
# Randomize gradient for next step
self.inputs['Grad'] = np.random.uniform(
-1, 1, (102, 105)).astype("float32")
def adamax_step(inputs, attributes):
'''
Simulate one step of the adamax optimizer
:param inputs: dict of inputs
:param attributes: dict of attributes
:return tuple: tuple of output param, moment, inf_norm and
beta1 power accumulator
'''
param = inputs['Param']
grad = inputs['Grad']
moment = inputs['Moment']
inf_norm = inputs['InfNorm']
lr = inputs['LearningRate']
beta1_pow = inputs['Beta1Pow']
beta1 = attributes['beta1']
beta2 = attributes['beta2']
epsilon = attributes['epsilon']
moment_out = beta1 * moment + (1 - beta1) * grad
inf_norm_out = np.maximum(beta2 * inf_norm + epsilon, np.abs(grad))
beta1_pow_out = beta1_pow * beta1
lr_t = (lr / (1 - beta1_pow_out))
param_out = param - lr_t * np.divide(moment_out, inf_norm_out)
return param_out, moment_out, inf_norm_out, beta1_pow_out
if __name__ == "__main__":
unittest.main()
import unittest
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
class TestInferShape(unittest.TestCase):
def test_sum_op(self):
prog = core.ProgramDesc.__create_program_desc__()
self.assertIsNotNone(prog)
block = prog.block(0)
self.assertIsNotNone(block)
shape = [10, 20]
# prepare input/output
x1 = block.new_var("x1")
x1.set_shape(shape)
x2 = block.new_var("x2")
x2.set_shape(shape)
out = block.new_var("out")
# prepare the operator
sum_op_desc = block.append_op()
sum_op_desc.set_type("sum")
sum_op_desc.set_input("X", ["x1", "x2"])
sum_op_desc.set_output("Out", ["out"])
core.Operator.infer_shape(sum_op_desc, block)
self.assertEqual(out.shape(), shape)
def test_mul_op(self):
prog = core.ProgramDesc.__create_program_desc__()
self.assertIsNotNone(prog)
block = prog.block(0)
self.assertIsNotNone(block)
x_shape = [10, 20]
y_shape = [20, 30]
# prepare input/output
x1 = block.new_var("x")
x1.set_shape(x_shape)
x2 = block.new_var("y")
x2.set_shape(y_shape)
out = block.new_var("out")
# prepare the operator
mul_op_desc = block.append_op()
mul_op_desc.set_type("mul")
mul_op_desc.set_input("X", ["x"])
mul_op_desc.set_input("Y", ["y"])
mul_op_desc.set_output("Out", ["out"])
mul_op_desc.set_attr("x_num_col_dims", 1)
mul_op_desc.set_attr("y_num_col_dims", 1)
core.Operator.infer_shape(mul_op_desc, block)
self.assertEqual(out.shape(), [x_shape[0], y_shape[1]])
if __name__ == '__main__':
unittest.main()
import unittest
import numpy as np
from op_test import OpTest
class TestRmspropOp1(OpTest):
''' Test RMSProp with explicit inputs
'''
def setUp(self):
self.op_type = "rmsprop"
param = np.random.random((123, 321)).astype("float32")
mean_square = np.random.random((123, 321)).astype("float32")
learning_rate = np.array([0.01]).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
epsilon = 1e-6
decay = 0.9
momentum = 0.0
self.inputs = {
'Param': param,
'MeanSquare': mean_square,
'LearningRate': learning_rate,
'Grad': grad,
'Moment': moment,
}
self.attrs = {'epsilon': epsilon, 'decay': decay, 'momentum': momentum}
ms_out = decay * mean_square + (1 - decay) * grad * grad
moment_out = momentum * moment + \
learning_rate * grad / np.sqrt(ms_out + epsilon)
param_out = param - moment_out
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'MeanSquareOut': ms_out
}
def test_check_output(self):
self.check_output()
class TestRmspropOp2(OpTest):
'''Test RMSProp with defaukt values for attributes
'''
def setUp(self):
self.op_type = "rmsprop"
param = np.random.random((123, 321)).astype("float32")
mean_square = np.random.random((123, 321)).astype("float32")
learning_rate = np.array([0.01]).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
epsilon = 1.0e-10
decay = 0.9
momentum = 0.0
self.inputs = {
'Param': param,
'MeanSquare': mean_square,
'LearningRate': learning_rate,
'Grad': grad,
'Moment': moment,
}
ms_out = decay * mean_square + (1 - decay) * grad * grad
moment_out = momentum * moment + \
learning_rate * grad / np.sqrt(ms_out + epsilon)
param_out = param - moment_out
self.outputs = {
'ParamOut': param_out,
'MomentOut': moment_out,
'MeanSquareOut': ms_out
}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
import logging
import paddle.v2.framework.core as core
import unittest
import numpy as np
class TestTensorArray(unittest.TestCase):
def setUp(self):
self.ta = core.TensorArray()
self.batch_size = 10
self.dim = 2
# create a LoDTensor
self.scope = core.Scope()
var = self.scope.new_var("test_tensor")
self.place = core.CPUPlace()
tensor = var.get_tensor()
tensor.set_dims([self.batch_size, self.dim])
tensor.alloc_float(self.place)
tensor_array = np.array(tensor)
tensor_array[0, 0] = 0
tensor_array[1, 0] = 1
tensor_array[2, 0] = 2
tensor_array[3, 0] = 3
tensor_array[4, 0] = 4
tensor_array[5, 0] = 5
tensor_array[6, 0] = 6
tensor_array[7, 0] = 7
tensor_array[8, 0] = 8
tensor_array[9, 0] = 9
lod_py = [[0, 2, 5, 10]]
lod_tensor = core.LoDTensor(lod_py)
lod_tensor.set(tensor_array, self.place)
self.py_seq_meta = [[5, 10, 2], [2, 5, 1], [0, 2, 0]]
self.tensor = lod_tensor
def test_unstack(self):
self.ta.unstack(self.tensor)
self.assertEqual(self.tensor.get_dims()[0], self.ta.size())
def test_read(self):
self.ta.unstack(self.tensor)
for i in range(self.batch_size):
tensor = self.ta.read(i)
def test_write(self):
self.ta.unstack(self.tensor)
# create a tensor with shape of [1, self.dim]
var = self.scope.new_var("hell")
tensor = var.get_tensor()
tensor.set_dims([1, self.dim])
tensor.alloc_float(self.place)
tensor_array = np.array(tensor)
for i in range(self.dim):
tensor_array[0, i] = i
tensor.set(tensor_array, self.place)
self.ta.write(2, tensor)
ta_tensor = self.ta.read(2)
ta_tensor_array = np.array(ta_tensor)
self.assertEqual(ta_tensor.get_dims(), [1, self.dim])
self.assertTrue((tensor_array == ta_tensor_array).all())
def test_write_shared(self):
self.ta.unstack(self.tensor)
# create a tensor with shape of [1, self.dim]
var = self.scope.new_var("hell")
tensor = var.get_tensor()
tensor.set_dims([1, self.dim])
tensor.alloc_float(self.place)
tensor_array = np.array(tensor)
for i in range(self.dim):
tensor_array[0, i] = i
tensor.set(tensor_array, self.place)
self.ta.write_shared(2, tensor)
ta_tensor = self.ta.read(2)
ta_tensor_array = np.array(ta_tensor)
self.assertEqual(ta_tensor.get_dims(), [1, self.dim])
self.assertTrue((tensor_array == ta_tensor_array).all())
def test_unpack(self):
meta = self.ta.unpack(self.tensor, 0, True)
self.assertEqual(self.ta.size(), 5)
self.assertEqual(meta, self.py_seq_meta)
def test_pack(self):
meta = self.ta.unpack(self.tensor, 0, True)
print "meta", meta
tensor = self.ta.pack(0, meta, self.tensor.lod())
print np.array(self.tensor)
print np.array(tensor)
self.assertTrue((np.array(self.tensor) == np.array(tensor)).all())
self.assertTrue(tensor.lod(), self.tensor.lod())
if __name__ == '__main__':
unittest.main()
...@@ -164,11 +164,18 @@ class SGD(object): ...@@ -164,11 +164,18 @@ class SGD(object):
pass_type) pass_type)
self.__gradient_machine__.eval(pass_evaluator) self.__gradient_machine__.eval(pass_evaluator)
self.__gradient_machine__.eval(batch_evaluator) self.__gradient_machine__.eval(batch_evaluator)
event_handler(
v2_event.EndForwardBackward(
pass_id=pass_id,
batch_id=batch_id,
gm=self.__gradient_machine__))
for each_param in self.__gradient_machine__.getNonStaticParameters( for each_param in self.__gradient_machine__.getNonStaticParameters(
): ):
self.__parameter_updater__.update(each_param) self.__parameter_updater__.update(each_param)
cost_sum = out_args.sum() cost_sum = out_args.sum()
cost = cost_sum / len(data_batch) cost = cost_sum / len(data_batch)
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
event_handler( event_handler(
v2_event.EndIteration( v2_event.EndIteration(
pass_id=pass_id, pass_id=pass_id,
...@@ -176,8 +183,6 @@ class SGD(object): ...@@ -176,8 +183,6 @@ class SGD(object):
cost=cost, cost=cost,
evaluator=batch_evaluator, evaluator=batch_evaluator,
gm=self.__gradient_machine__)) gm=self.__gradient_machine__))
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
self.__parameter_updater__.finishPass() self.__parameter_updater__.finishPass()
pass_evaluator.finish() pass_evaluator.finish()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册