提交 dcf5e948 编写于 作者: C chengduoZH

remove conflict

...@@ -61,32 +61,32 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl ...@@ -61,32 +61,32 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
## Installation ## Installation
It is recommended to check out the It is recommended to check out the
[Docker installation guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) [Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/docker_install_en.html)
before looking into the before looking into the
[build from source guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html). [build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/build_from_source_en.html).
## Documentation ## Documentation
We provide [English](http://doc.paddlepaddle.org/develop/doc/) and We provide [English](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) and
[Chinese](http://doc.paddlepaddle.org/doc_cn/) documentation. [Chinese](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) documentation.
- [Deep Learning 101](http://book.paddlepaddle.org/index.html) - [Deep Learning 101](http://www.paddlepaddle.org/docs/develop/book/01.fit_a_line/index.html)
You might want to start from this online interactive book that can run in a Jupyter Notebook. You might want to start from this online interactive book that can run in a Jupyter Notebook.
- [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) - [Distributed Training](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/usage/cluster/cluster_train_en.html)
You can run distributed training jobs on MPI clusters. You can run distributed training jobs on MPI clusters.
- [Distributed Training on Kubernetes](http://doc.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html) - [Distributed Training on Kubernetes](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/usage/cluster/k8s_en.html)
You can also run distributed training jobs on Kubernetes clusters. You can also run distributed training jobs on Kubernetes clusters.
- [Python API](http://doc.paddlepaddle.org/develop/doc/api/index_en.html) - [Python API](http://www.paddlepaddle.org/docs/develop/documentation/en/api/index_en.html)
Our new API enables much shorter programs. Our new API enables much shorter programs.
- [How to Contribute](http://doc.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html) - [How to Contribute](http://www.paddlepaddle.org/docs/develop/documentation/en/howto/dev/contribute_to_paddle_en.html)
We appreciate your contributions! We appreciate your contributions!
......
...@@ -28,6 +28,10 @@ function train() { ...@@ -28,6 +28,10 @@ function train() {
--test_period=100 \ --test_period=100 \
--config_args=$args \ --config_args=$args \
2>&1 | tee ${log} 2>&1 | tee ${log}
avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'`
fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'`
echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
} }
if [ ! -f "train.list" ]; then if [ ! -f "train.list" ]; then
......
set -e
function clock_to_seconds() {
hours=`echo $1 | awk -F ':' '{print $1}'`
mins=`echo $1 | awk -F ':' '{print $2}'`
secs=`echo $1 | awk -F ':' '{print $3}'`
echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'`
}
function infer() {
unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
topology=$1
layer_num=$2
bs=$3
thread=`nproc`
if [ $thread -gt $bs ]; then
thread=$bs
fi
log="logs/infer-${topology}-${layer_num}-${thread}openblas-${bs}.log"
models_in="models/${topology}-${layer_num}/pass-00000/"
if [ ! -d $models_in ]; then
echo "./run_mkl_infer.sh to save the model first"
exit 0
fi
log_period=$((256 / bs))
paddle train --job=test \
--config="${topology}.py" \
--use_gpu=False \
--trainer_count=$thread \
--log_period=$log_period \
--config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \
--init_model_path=$models_in \
2>&1 | tee ${log}
# calculate the last 5 logs period time of 1280 samples,
# the time before are burning time.
start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
start_sec=`clock_to_seconds $start`
end_sec=`clock_to_seconds $end`
fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'`
echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
}
if [ ! -f "train.list" ]; then
echo " " > train.list
fi
if [ ! -f "test.list" ]; then
echo " " > test.list
fi
if [ ! -d "logs" ]; then
mkdir logs
fi
# inference benchmark
for batchsize in 1 2 4 8 16; do
infer googlenet v1 $batchsize
infer resnet 50 $batchsize
infer vgg 19 $batchsize
done
set -e
function train() {
unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
topology=$1
layer_num=$2
bs=$3
thread=`nproc`
# each trainer_count use only 1 core to avoid conflict
log="logs/train-${topology}-${layer_num}-${thread}openblas-${bs}.log"
args="batch_size=${bs},layer_num=${layer_num}"
config="${topology}.py"
paddle train --job=time \
--config=$config \
--use_gpu=False \
--trainer_count=$thread \
--log_period=10 \
--test_period=100 \
--config_args=$args \
2>&1 | tee ${log}
avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'`
fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'`
echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
}
if [ ! -f "train.list" ]; then
echo " " > train.list
fi
if [ ! -d "logs" ]; then
mkdir logs
fi
# training benchmark
for batchsize in 64 128 256; do
train vgg 19 $batchsize
train resnet 50 $batchsize
train googlenet v1 $batchsize
done
...@@ -7,3 +7,4 @@ API ...@@ -7,3 +7,4 @@ API
模型配置 <v2/model_configs.rst> 模型配置 <v2/model_configs.rst>
数据访问 <v2/data.rst> 数据访问 <v2/data.rst>
训练与应用 <v2/run_logic.rst> 训练与应用 <v2/run_logic.rst>
v2/fluid.rst
...@@ -188,12 +188,6 @@ beam_search_decode ...@@ -188,12 +188,6 @@ beam_search_decode
:noindex: :noindex:
lstm
---------
.. autofunction:: paddle.v2.fluid.layers.lstm
:noindex:
lod_rank_table lod_rank_table
--------- ---------
.. autofunction:: paddle.v2.fluid.layers.lod_rank_table .. autofunction:: paddle.v2.fluid.layers.lod_rank_table
...@@ -300,3 +294,27 @@ conv2d_transpose ...@@ -300,3 +294,27 @@ conv2d_transpose
.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose .. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
:noindex: :noindex:
sequence_expand
---------
.. autofunction:: paddle.v2.fluid.layers.sequence_expand
:noindex:
lstm_unit
---------
.. autofunction:: paddle.v2.fluid.layers.lstm_unit
:noindex:
sequence_softmax
---------
.. autofunction:: paddle.v2.fluid.layers.sequence_softmax
:noindex:
reduce_sum
---------
.. autofunction:: paddle.v2.fluid.layers.reduce_sum
:noindex:
# Executor Design Doc # Executor Design Doc
## Motivation ## Motivation
In [fluid](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/fluid.md), we encourage the user to use deep learning programming paradigms to describe the training process. When the user-written Python program is executed, it will first create a protobuf message
[`ProgramDesc`](https://github.com/PaddlePaddle/Paddle/blob/a91efdde6910ce92a78e3aa7157412c4c88d9ee8/paddle/framework/framework.proto#L145) that describes the process and is conceptually like an [abstract syntax tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree).
We use executor to do the runtime evaluation of a `ProgramDesc`. The executor runs the `ProgramDesc` like an interpreter. `ProgramDesc` contains the intrinsics (operators in this case) and variables which will be used, executor explicitly executes the stored precompiled code.
## Overview ## Overview
An executor takes a `ProgramDesc`, a `block_id` and a `Scope`. The `ProgramDesc` is a list of blocks and each block contains the protobuf definition of all the parameters and operators. The `block_id` specifies the entrance block. And the `Scope` is the container of all the variable instance, which is persistent throughout different runs. An executor takes a `ProgramDesc`, a `block_id` and a `Scope`. The `ProgramDesc` is a list of blocks and each block contains the protobuf definition of all the parameters and operators in the block. The `block_id` specifies the entrance block. And the `Scope` is the container of all the variable instances, which is persistent throughout different runs.
### What does executor do? ## Executor
It evaluates all the operators in the `block_id`th block of a `ProgramDesc`. The `Executor` explicitly executes all the intrinsics (operators here) in the `block_id`th block of a `ProgramDesc`. Essentially, it instantiates Variables and Operators, then runs all the operators in sequence one-by-one.
It is very similar to how a push stack frame works when entering a block, following which it cleans up all the temporary variables when a mini-batch is finished. It does not however, have the stack frame pop process.
### What does executor NOT do? ### The interface
```c++
Executor(places);
```
A executor does not own any computing resources, a user can only construct an executor using the specified places.
It does not do runtime optimization, meaning intelligently parse the dependency of each op a choose which one to be run and in which order they should be run. ### Running an Executor
It does not do graph partitioning, meaning dividing the `ProgramDesc` into several small pieces and executing them on different devices. ```
void Run(ProgramDesc, Scope, block_id, create_local_scope);
## Implementation ```
An `Executor` only provides a unified way to execute `ProgramDesc`. `ProgramDesc` is the target that will be executed, the `Scope` specifies the variable container, the `block_id` indicates the entrance block and `create_local_scope` is a boolean that states whether it will destroy the temporary variables after the execution is finished.
`Executor` evaluates a `ProgramDesc`. Essentially, it instantiates Variables and Operators, then run all the operators in sequence. [[code]](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)
## Problem
In PaddlePaddle's [Design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md), one Operator may have multiple kernels. Users may have some personal preference to choose a certain type of kernel for an operator, such as `force_cpu` to choose a CPU kernel, `use_cudnn` to choose a CUDNN kernel, we need to provide a way for users to do this.
In the current design, we use KernelType to describe one kernel.
```cpp
struct KernelType {
Place place_;
DataType data_type_;
LayoutType layout_;
};
```
`place_` `data_type_` and `layout_` can be got from the input tensors of the operator, `GetActualKernelType(inputs)` use inputs to infer the proper kernel key that fit the incoming data, but users can not directly configure it.
The [design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md) also provides a virtual method `GetExpectedKernelType` that user can overload and use to choose the KernelType they want to use.
So we should send the information user defined in proto to `GetExpectedKernelType` for choosing a kernel.
The problem is, how should we define and send the information for `GetExpectedKernelType` to use?
## Solution
### Potential choice
1. Do nothing, let the user add the information they want to operator‘s attribute and get them inside `GetExpectedKernelType`, this can work properly. But there is a little problem that users may define many kinds of hints for the same purpose, such as `force_cpu`, `use_cpu`, `cpu_kernel` to choose CPU kernel, and `use_cudnn`, `force_cudnn`, `cudnn_kernel` to choose CUDNN kernel.
2. Pre-define all the needed option and use a single attr key such as `kernel_hint` for the user, this is not so flexible if the user wants to define some more kind of hint.
### Final choice
To provide enough flexibility while avoiding confusion definition, we can define some global constants for these attribute names, such as `force_cpu`, `use_cudnn`, `use_mkldnn` for a user to choose.
In C++
```cpp
const std::string kForceCPU = "force_cpu";
const std::string kUseCUDNN = "use_cudnn";
const std::string kUseMKLDNN = "use_mkldnn";
KernelType GetExpectedKernelType() {
if (Attr<bool>(kForceCPU)) {
return KernelType(CPUPlace, ...)
} else {
...
}
}
```
In Python code
```python
FORCE_CPU = core.kForceCPU()
def xx_layer(..., force_cpu=false):
layer_helper = LayerHelper(...)
layer_helper.append_op(
type="xx",
attr={FORCE_CPU: force_cpu})
```
# Intel® MKL Packed on PaddlePaddle: Design Doc
## Contents
- [Overview](#overview)
- [Key Points](#key-points)
- [Background](#background)
- [Solution](#solution)
- [Actions](#actions)
- [CMake](#cmake)
- [Layers](#layers)
- [Unit Tests](#unit-tests)
- [Python API](#python-api)
- [Benchmarking](#benchmarking)
## Overview
我们计划将 Intel® MKL 中引入的 GEMM Packed APIs\[[1](#references)\] 集成到 PaddlePaddle 中,充分发挥英特尔平台的优势,有效提升PaddlePaddle在英特尔架构上的性能。
现阶段的优化主要针对 Recurrent Neural Network(以下简称RNN)相关层(包括`RecurrentLayer`, `GatedRecurrentLayer``LstmLayer`), 以及 PaddlePaddle V1 API。
## Key Points
### Background
目前PaddlePaddle采用了 Intel® MKL库的[cblas_?gemm](https://software.intel.com/en-us/mkl-developer-reference-c-cblas-gemm)函数,这个函数本身会在计算前将原数据转换为更适合英特尔平台的内部格式。
1. 转换耗时 \
这一数据格式的转换操作(Packing),在问题本身的计算量比较小的时候,显得相对来说较为耗时。例如在DeepSpeech2 \[[2](#references)\] 的Vanilla RNN部分中,矩阵大小是`batch_size * 2048`
2. 转换冗余 \
由于在现有的某些情况下(例如RNN),多次调用 cblas_?gemm 会使用相同的原数据,因此,每次调用时对原数据的重复Packing便成为了冗余。
为了最大程度减少多次调用 cblas_?gemm 在Packing上的耗时,Intel® MKL 引入了以下四个API:
* [cblas_?gemm_alloc](https://software.intel.com/en-us/mkl-developer-reference-c-cblas-gemm-alloc)
* [cblas_?gemm_pack](https://software.intel.com/en-us/mkl-developer-reference-c-cblas-gemm-pack)
* [cblas_?gemm_compute](https://software.intel.com/en-us/mkl-developer-reference-c-cblas-gemm-compute)
* [cblas_?gemm_free](https://software.intel.com/en-us/mkl-developer-reference-c-cblas-gemm-free)
通过使用这些API,我们可以先完成对原数据的Packing操作,再把已转换为Packed格式的数据传递给那些复用同一数据的gemm_compute函数,从而避免了Packing冗余。
### Solution
在RNN的情况下,同一次前向、后向(forward/backward)过程中所有时间步(time step)共享同一个权重(weight)。当只做推断(inference)时,各次前向之间也都使用了相同的权重,没有必要在每次前向中每个时间步的计算时对权重进行重复的Packing操作。
我们通过使用新引入的GEMM Packed APIs,在层初始化的时候,先完成对权重的Packing操作,然后在前向,后向时复用已经转换过的权重,并在每次权重更新后,对新的权重进行转换用于下次迭代。
* 优化前,对于序列长度(sequence length)为`T`的网络模型(model), `N`次迭代执行的转换次数为:
- `inference``N * T`
- `training``2 * N * T`
* 优化后,对于同样设置的网络模型,其转换次数减少至:
- `inference``1`
- `training``2 * N`
## Actions
添加的相关文件和目录结构如下:
```txt
PaddlePaddle/Paddle
├── ...
└── paddle/
├── ...
└── gserver/
├── ...
├── layers/
│ ├── ...
│ ├── MKLPackedRecurrentLayer.*
| ├── MKLPackedGatedRecurrentLayer.*
| ├── MKLPackedLstmLayer.*
| └── MKLPackedGemm.h
└── tests/
├── ...
└── test_MKLPacked.cpp
```
### CMake
在对应的`CMakeLists.txt`中根据`WITH_MKL`是否打开,来决定是否开启MKL Packed相关功能。
### Layers
所有的`MKLPacked*Layer`都继承于PaddlePaddle的基类`Layer`, 并添加头文件 `MKLPackedGemm.h`,该文件对相关GEMM Packed APIs做了封装。
### Unit Tests
我们会添加`test_MKLPacked.cpp`用于MKL Packed优化后layer的测试。
对于每一个新加的RNN layer,我们会对比如下2个方面:
1. 对比优化后layer自身,sequence mode(`rnn_use_batch=false`)与batch mode(`rnn_use_batch=true`)的结果。
2. 对比优化后layer与相对应的PaddlePaddle原有layer, 在batch mode下的结果。
### Python API
计划在`paddle/utils.Flags`中添加`use_mkl_packed`的flag,用于选择是否使用相关功能,并且当编译时`WITH_MKL=ON`的情况下,默认设置为`true`
同时,在`python/paddle/trainer/config_parser.py`中对应的layer处,添加`use_mkl_packed`这个选择,方便用户在Python端选择是否启用这个功能。
具体实现方式比如:
```python
use_mkl_packed = bool(int(g_command_config_args.get("use_mkl_packed", 0)))
if use_mkl_packed:
self.layer_type = mkl_packed_*
```
所有相关的`layer_type`会以*mkl_packed_*开头,这些会在`MKLPacked*Layer`注册layer的时候保证,以示区分。
### Benchmarking
会添加相应的脚本用于测试和对比在使用MKL Packed recurrent layers 前后的网络性能。
## References
1. [Introducing the new Packed APIs for GEMM](https://software.intel.com/en-us/articles/introducing-the-new-packed-apis-for-gemm)
2. [DeepSpeech2 on PaddlePaddle](https://github.com/PaddlePaddle/DeepSpeech#deepspeech2-on-paddlepaddle)
...@@ -208,4 +208,3 @@ if use_mkldnn ...@@ -208,4 +208,3 @@ if use_mkldnn
但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。 但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。
4. MKL-DNN的高性能格式与PaddlePaddle原有的`NCHW`不同(PaddlePaddle中的cuDNN部分使用的也是`NCHW`,所以不存在这个问题)。 4. MKL-DNN的高性能格式与PaddlePaddle原有的`NCHW`不同(PaddlePaddle中的cuDNN部分使用的也是`NCHW`,所以不存在这个问题)。
所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。 所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。
# Design Doc: NCCL support in Paddle Fluid
## Abstract
This Design Doc refers to the NCCL feature in paddle. We propose an approach to support NCCL library both on a single machine and multiple machines. We wrapper the NCCL primitives `Broadcast`, `Allreduce`, `Reduce` as operators to utilize Multi-GPU powers in one script.
## Motivation
[NCCL](https://developer.nvidia.com/nccl) is a NVIDIA library support Multi-GPU communicating and optimized for NVIDIA GPUs, it provides routines such as all-gather, all-reduce, broadcast, reduce, reduce-scatter, that can achieve high bandwidth over PCIe and NVLink high-speed interconnect. With NCCL library, we can easily accelerate the training in parallel.
- Pros
1. easily plug-in with [NCCL2](https://developer.nvidia.com/nccl) library.
1. high performance in NVIDIA GPUs.
1. MPI like primitives, which have low learning cost for users.
- Cons
1. Only design for NVIDIA GPUs, not a general multi-device solution.
1. Although NCCL1 is opensourced under BSD license, but NCCL2 is not opensourced anymore.
At the beginning of training, the framework needs to distribute the same parameters to every GPU, and merge the gradients at any time user interests.
As a result, during training, we need the operations of peer to peer copy between different GPUs, aggregating gradients/parameters from GPUs, and broadcasting parameters to GPUs. Every GPU only need to run the operator with correct place information.
Besides, it needs interfaces to synchronize model update with each different GPU Cards.
## Implementation
As mentioned above, we wrap the NCCL routines as several kinds of operators. Need to note that NCCL need to create Communicator between gpu at the beginning, so there is a NCCLInit operator created.
### Transpiler
To be compatible with [parameter server design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/ops/dist_train.md), the transpiler compiles the user defined operation graph into sub-graphs to be executed on different devices.
1. The user-defined model will be a single device program
2. Broadcast/Reduce operators between GPUs will be inserted into the program, even for the multi-node, may insert the `Send`, `Recv` operator.
*Broadcast, AllReduce in a single machine. And Broadcast, AllReduce, [Send, Recv](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/ops/dist_train.md#graph-converter) in multiple machines*
<img src="images/multigpu_before_convert.png" width="300"/>
After compiling, the graph as shows
<img src="images/multigpu_allreduce.png" width="1000"/>
Operators are added to the sub-graphs. Every GPU assigned a role of `rank0`, `rank1` etc.
- **Broadcast**. Broadcast operator distribute initialized parameter to all the GPUs from the GPU who owns it. e.g. from`rank0` GPU.
- **AllReduce**. AllReduce operator synchronizes parameters/gradients between GPUs. AllReduce implemented in the Ring-Based communicating method, avoid of the bottle neck in a single GPU.
Need to notice that AllReduce operator force GPUs synchronized at that point. The whole training process in asynchronous or synchronous mode depends on the AllReduce point in the graph.
As it shown in the picture, when each GPU compute the gradient of `W`, followed with a `AllReduce` operator, accumulate the `dW` to full batch of data, then run the optimize process individually and apply the gradient to its `W`.
- **AllReduce**
Need to note that our AllReduce operator is a ring-base AllReduce implementation. If we use the NCCL2 AllReduce primitive, every GPU optimized full batch of data, wasted (n-1) GPU compute resources. In addition, NCCL2 built-in AllReduce will only utilize the communicating resource during synchronization, then update the gradient will be a subsequent phase. In fact, we can amortize the update gradient time cost into the communicating phase. The process is
1. Every parameter has its root card. That card will responsible for aggregating the gradients from GPUs.
2. The whole model's parameter will be hashed to different root card, ensure the load balance between GPUs.
3. Logically neighberhood card will start send parameter to the next one. After one round, the parameter main card will aggregate the full gradients.
4. Then the root card will optimize the parameter.
5. This parameter card will send its optimized result to its neighberhood, then the neighberhood will send parameter to its next one.
6. Finish the sychronization round.
The total time cost will be 2 * (n-1) * per-parameter-send-time, we reach the goal of amortize the upgrade time into communicating phase.
# Design Doc: Execute the Program with Multi CPU
## Abstract
This Design Doc propose an approach to make the user-defined Op graph
running with multi-CPU, we will use an auto transpiler to convert the user-defined
Op graph to a multi-CPU Op graph, and run `ParallelDo` Op to run the graph.
## Transpiler
<img src="src/multi-threads/single-thread@3x.png" width="300">
After converted:
<img src="src/multi-threads/multi-threads@3x.png" width="1000">
## Implement
- `Multi-CPU Transpiler` will convert the graph to a multi-CPU graph
which would be executed with multi-threads.
- `BlockingCounter` will `Init/Decrement` an atomic counter, and Blocking `Wait`
for the atomic counter become `0`:
```cpp
BlockingCounter bc(thread_count);
for (int i = 0; i < thread_count; ++i) {
thread_pool->Start([&bc] {bc.DecrementCount(); })
}
bc.Wait();
```
- `ParallelDo` Operator
- Initialize a thread pool which is a Singleton.
- Use a block id as the input, and create run the specify Block on independent scope
with multi-threads.
- Initialize a `BlockingCounter` instance and wait until all threads are done.
- `Split` Operator will split the Input Tensor into a TensorArray.
- `Merge` merge all the gradients which calculated in different threads
with `mean/sum/max/min...` method, and then run the Optimizer Op to optimize `W`.
## TODO
- Improve the optimizer stage with multi-threads, since we could
assign the parameters to the different threads and execute
optimizer with multi-threads.
## Background
Every operator has many kernels because there are multiple data types, places, data layout that Fluid supports. We use the `KernelType` to describe kernel types that operators can hold.
The `KernelType` is as follows.
```
struct KernelType {
Place place_;
DataType data_type_;
LayoutType layout_;
};
```
The `place_` is a descriptor of the device and the computational library, e.g., `MKLDNNPlace`, `CUDAPlace`.
The `data_type_` is the data type that this kernel performs on, e.g., `FP32`, `INT64`. Note that one kernel may have inputs with different data types. However, it will be a major `data_type`. For example, the `cross_entropy` takes `int64` as it label, and `double`/`float` as its input logit and output cost. The major `data_type` of `cross_entropy` is `float`/`double`.
The `layout` is useful for some computational library. One example is that MKLDNN uses many kinds of layout, such as `nChw8c`. Each kind of layout will invoke the different kernel.
## Problem
We register a kernel for every operator and every kernel type ideally. However, it is impracticable for the following situations.
1. Some operators, like CRF, are complicated and inefficient to be implemented on GPU. The CRF operator will only have a CPU kernel.
2. Some operators will take too many memory. It is better to force them into CPU. However, the rest of operators in this neural network will be performed on GPU, i.e., model parallel problem.
3. Some layout and place are particular. One example is that MKLDNN uses `nChw8` and there is no other library uses `nChw8c`.
Problems under these situations are similar. We can formalise this problem as follow.
We register kernels with types $KT = \{kt_1, kt_2, kt_3, ...\}$ for one operator. The inputs of this operator should be run on kernel type $kt_{?}$, which the $kt_{?} \notin KT$. How to cast the input of this operator from $kt_{?}$ to any of kernel type in $KT$.
## Solution
It is clearly that transforming inputs of an operator toadapt another kernel type is not related to the particular operator. So we should register these transformation methods as global methods.
We can infer a kernel type from the inputs of an operators. We let this kernel type as `actual kernel type`, which means this kernel type is the actually kernel type that operator should be performed.
We can get a kernel type by 1) The configuration of operator description. (Users may want to force use `MKL` for `conv` operator). 2) The place of the current executor. (Executor is running on GPU). This kernel type is what we expect the operator will be performed on. We let this kernel type as `expect kernel type`.
We transform the input data from `actual` to `expect` if the expect kernel type is not as same as actual kernel type.
The algorithm is described as follow
```cpp
using DataTransformationFN = std::function<void(const Tensor& in, Tensor* out)>;
using KernelTypePair = std::pair<KernelType, KernelType>;
map<KernelTypePair, DataTransformationFN> g_data_transformation_;
void OpWithKernel::Run() {
vec<Tensor> inputs = ...
auto actual_kernel_type = GetActualKernelType(inputs);
// The expected kernel type is related to actual kernel type.
// For the most operators, the expected kernel type is as same as
// actual kernel type.
//
// So we pass `actual_kernel_type` as a parameter of
// GetExpectedKernelType
auto expect_kernel_type = GetExpectedKernelType(actual_kernel_type);
auto trans = g_data_transformation_[{actual_kernel_type, expect_kernel_type}];
kernel.run(trans(inputs));
}
```
...@@ -128,7 +128,7 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note ...@@ -128,7 +128,7 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note
AVX是一种CPU指令集,可以加速PaddlePaddle的计算。最新的PaddlePaddle Docker镜像默认 AVX是一种CPU指令集,可以加速PaddlePaddle的计算。最新的PaddlePaddle Docker镜像默认
是开启AVX编译的,所以,如果您的电脑不支持AVX,需要单独 是开启AVX编译的,所以,如果您的电脑不支持AVX,需要单独
`编译 <./build_from_source_cn.rst>`_ PaddlePaddle为no-avx版本。 `编译 <./build_from_source_cn.html>`_ PaddlePaddle为no-avx版本。
以下指令能检查Linux电脑是否支持AVX: 以下指令能检查Linux电脑是否支持AVX:
......
...@@ -137,7 +137,7 @@ GPU driver installed before move on. ...@@ -137,7 +137,7 @@ GPU driver installed before move on.
AVX is a kind of CPU instruction can accelerate PaddlePaddle's calculations. AVX is a kind of CPU instruction can accelerate PaddlePaddle's calculations.
The latest PaddlePaddle Docker image turns AVX on by default, so, if your The latest PaddlePaddle Docker image turns AVX on by default, so, if your
computer doesn't support AVX, you'll probably need to computer doesn't support AVX, you'll probably need to
`build <./build_from_source_en.rst>`_ with :code:`WITH_AVX=OFF`. `build <./build_from_source_en.html>`_ with :code:`WITH_AVX=OFF`.
The following command will tell you whether your computer supports AVX. The following command will tell you whether your computer supports AVX.
......
import paddle.v2 as paddle
import numpy as np
paddle.init(use_gpu=False)
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(2))
y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear())
# loading the model which generated by training
with open('params_pass_90.tar', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
# Input multiple sets of data,Output the infer result in a array.
i = [[[1, 2]], [[3, 4]], [[5, 6]]]
print paddle.infer(output_layer=y_predict, parameters=parameters, input=i)
# Will print:
# [[ -3.24491572]
# [ -6.94668722]
# [-10.64845848]]
...@@ -26,6 +26,11 @@ def event_handler(event): ...@@ -26,6 +26,11 @@ def event_handler(event):
if event.batch_id % 1 == 0: if event.batch_id % 1 == 0:
print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
event.cost) event.cost)
# product model every 10 pass
if isinstance(event, paddle.event.EndPass):
if event.pass_id % 10 == 0:
with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
trainer.save_parameter_to_tar(f)
# define training dataset reader # define training dataset reader
......
...@@ -147,4 +147,9 @@ PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和 ...@@ -147,4 +147,9 @@ PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和
.. literalinclude:: src/train.py .. literalinclude:: src/train.py
:linenos: :linenos:
使用以上训练好的模型进行预测,取其中一个模型params_pass_90.tar,输入需要预测的向量组,然后打印输出:
.. literalinclude:: src/infer.py
:linenos:
有关线性回归的实际应用,可以参考PaddlePaddle book的 `第一章节 <http://book.paddlepaddle.org/index.html>`_。 有关线性回归的实际应用,可以参考PaddlePaddle book的 `第一章节 <http://book.paddlepaddle.org/index.html>`_。
...@@ -53,7 +53,7 @@ Kernel实现 | CPU、CUDA共享Kernel实现在`.h`文件中,否则,CPU ...@@ -53,7 +53,7 @@ Kernel实现 | CPU、CUDA共享Kernel实现在`.h`文件中,否则,CPU
```cpp ```cpp
class MulOpMaker : public framework::OpProtoAndCheckerMaker { class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor), 2D tensor of size (M x K)"); AddInput("X", "(Tensor), 2D tensor of size (M x K)");
AddInput("Y", "(Tensor), 2D tensor of size (K x N)"); AddInput("Y", "(Tensor), 2D tensor of size (K x N)");
...@@ -82,7 +82,7 @@ The equation is: Out = X * Y ...@@ -82,7 +82,7 @@ The equation is: Out = X * Y
template <typename AttrType> template <typename AttrType>
class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ScaleOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of scale operator.").NotInGradient(); AddInput("X", "The input tensor of scale operator.").NotInGradient();
AddOutput("Out", "The output tensor of scale operator.").NotInGradient(); AddOutput("Out", "The output tensor of scale operator.").NotInGradient();
......
...@@ -50,7 +50,7 @@ First, define `ProtoMaker` to describe the Operator's input, output, and additio ...@@ -50,7 +50,7 @@ First, define `ProtoMaker` to describe the Operator's input, output, and additio
```cpp ```cpp
class MulOpMaker : public framework::OpProtoAndCheckerMaker { class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor), 2D tensor of size (M x K)"); AddInput("X", "(Tensor), 2D tensor of size (M x K)");
AddInput("Y", "(Tensor), 2D tensor of size (K x N)"); AddInput("Y", "(Tensor), 2D tensor of size (K x N)");
...@@ -79,7 +79,7 @@ An additional example [`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/de ...@@ -79,7 +79,7 @@ An additional example [`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/de
template <typename AttrType> template <typename AttrType>
class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ScaleOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of scale operator.").NotInGradient(); AddInput("X", "The input tensor of scale operator.").NotInGradient();
AddOutput("Out", "The output tensor of scale operator.").NotInGradient(); AddOutput("Out", "The output tensor of scale operator.").NotInGradient();
......
...@@ -9,9 +9,6 @@ ...@@ -9,9 +9,6 @@
usage/cmd_parameter/index_cn.rst usage/cmd_parameter/index_cn.rst
usage/cluster/cluster_train_cn.md usage/cluster/cluster_train_cn.md
usage/k8s/k8s_basis_cn.md
usage/k8s/k8s_cn.md
usage/k8s/k8s_distributed_cn.md
开发标准 开发标准
-------- --------
......
...@@ -9,8 +9,6 @@ Usage ...@@ -9,8 +9,6 @@ Usage
usage/cmd_parameter/index_en.rst usage/cmd_parameter/index_en.rst
usage/cluster/cluster_train_en.md usage/cluster/cluster_train_en.md
usage/k8s/k8s_en.md
usage/k8s/k8s_aws_en.md
Development Development
------------ ------------
......
...@@ -6,10 +6,10 @@ Core: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework ...@@ -6,10 +6,10 @@ Core: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework
Operator: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators Operator: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators
Optimizer: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/optimizer
Memory: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory Memory: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory
Platform: https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform
# Compile Time # Compile Time
The following **defines** the NN. The definition goes into this [protocol buffer](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto). The following **defines** the NN. The definition goes into this [protocol buffer](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto).
......
# PaddlePaddle分布式训练 # 分布式训练
* [概述](#概述)
* [环境准备](#环境准备)
* [启动参数说明](#启动参数说明)
* [启动参数服务器](#启动参数服务器)
* [启动计算节点](#启动计算节点)
* [准备数据集](#准备数据集)
* [准备训练程序](#准备训练程序)
* [使用分布式计算平台或工具](#使用分布式计算平台或工具)
* [使用Fabric启动集群作业](#使用fabric启动集群作业)
* [准备一个Linux集群](#准备一个linux集群)
* [启动集群作业](#启动集群作业)
* [终止集群作业](#终止集群作业)
* [检查集群训练结果](#检查集群训练结果)
* [检查模型输出](#检查模型输出)
* [在OpenMPI集群中提交训练作业](#在openmpi集群中提交训练作业)
* [准备OpenMPI集群](#准备OpenMPI集群)
* [启动集群作业](#启动集群作业-1)
* [在Kubernetes集群中提交训练作业](#在kubernetes集群中提交训练作业)
## 概述 ## 概述
本文将介绍如何使用PaddlePaddle在不同的集群框架下完成分布式训练。分布式训练架构如下图所示: 本文将介绍如何使用PaddlePaddle在不同的集群框架下完成分布式训练。分布式训练架构如下图所示:
<img src="https://user-images.githubusercontent.com/13348433/31772175-5f419eca-b511-11e7-9db7-5231fe3d9ccb.png" width="500"> <img src="https://user-images.githubusercontent.com/13348433/31772175-5f419eca-b511-11e7-9db7-5231fe3d9ccb.png" width="500">
...@@ -32,10 +15,11 @@ ...@@ -32,10 +15,11 @@
在使用同步SGD训练神经网络时,PaddlePaddle使用同步屏障(barrier),使梯度的提交和参数的更新按照顺序方式执行。在异步SGD中,则并不会等待所有trainer提交梯度才更新参数,这样极大地提高了计算的并行性:参数服务器之间不相互依赖,并行地接收梯度和更新参数,参数服务器也不会等待计算节点全部都提交梯度之后才开始下一步,计算节点之间也不会相互依赖,并行地执行模型的训练。可以看出,虽然异步SGD方式会提高参数更新并行度, 但是并不能保证参数同步更新,在任意时间某一台参数服务器上保存的参数可能比另一台要更新,与同步SGD相比,梯度会有噪声。 在使用同步SGD训练神经网络时,PaddlePaddle使用同步屏障(barrier),使梯度的提交和参数的更新按照顺序方式执行。在异步SGD中,则并不会等待所有trainer提交梯度才更新参数,这样极大地提高了计算的并行性:参数服务器之间不相互依赖,并行地接收梯度和更新参数,参数服务器也不会等待计算节点全部都提交梯度之后才开始下一步,计算节点之间也不会相互依赖,并行地执行模型的训练。可以看出,虽然异步SGD方式会提高参数更新并行度, 但是并不能保证参数同步更新,在任意时间某一台参数服务器上保存的参数可能比另一台要更新,与同步SGD相比,梯度会有噪声。
## 环境准备 ## 环境准备
1. 准备您的计算集群。计算集群通常由一组(几台到几千台规模)的Linux服务器组成。服务器之间可以通过局域网(LAN)联通,每台服务器具有集群中唯一的IP地址(或者可被DNS解析的主机名)。集群中的每台计算机通常被成为一个“节点”。 1. 准备您的计算集群。计算集群通常由一组(几台到几千台规模)的Linux服务器组成。服务器之间可以通过局域网(LAN)联通,每台服务器具有集群中唯一的IP地址(或者可被DNS解析的主机名)。集群中的每台计算机通常被成为一个“节点”。
1. 我们需要在集群的所有节点上安装 PaddlePaddle。 如果要启用GPU,还需要在节点上安装对应的GPU驱动以及CUDA。PaddlePaddle的安装可以参考[build_and_install](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/getstarted/build_and_install)的多种安装方式。我们推荐使用[Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_cn.rst)安装方式来快速安装PaddlePaddle。 1. 我们需要在集群的所有节点上安装 PaddlePaddle。 如果要启用GPU,还需要在节点上安装对应的GPU驱动以及CUDA。PaddlePaddle的安装可以参考[build_and_install](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/build_and_install/index_cn.html)的多种安装方式。我们推荐使用[Docker](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/build_and_install/docker_install_cn.html)安装方式来快速安装PaddlePaddle。
安装完成之后,执行下面的命令可以查看已经安装的版本(docker安装方式可以进入docker容器执行:`docker run -it paddlepaddle/paddle:[tag] /bin/bash`): 安装完成之后,执行下面的命令可以查看已经安装的版本(docker安装方式可以进入docker容器执行:`docker run -it paddlepaddle/paddle:[tag] /bin/bash`):
```bash ```bash
...@@ -63,12 +47,12 @@ $ paddle pserver --port=7164 --ports_num=1 --ports_num_for_sparse=1 --num_gradie ...@@ -63,12 +47,12 @@ $ paddle pserver --port=7164 --ports_num=1 --ports_num_for_sparse=1 --num_gradie
$ stdbuf -oL /usr/bin/nohup paddle pserver --port=7164 --ports_num=1 --ports_num_for_sparse=1 --num_gradient_servers=1 &> pserver.log $ stdbuf -oL /usr/bin/nohup paddle pserver --port=7164 --ports_num=1 --ports_num_for_sparse=1 --num_gradient_servers=1 &> pserver.log
``` ```
| 参数 | 是否必选 | 默认值 | 说明 | 参数说明
| ------------- | ------------- | ------------- | ------------- |
| port | 必选 | 7164 | pserver监听的起始端口,根据ports_num决定<br>总端口个数,从起始端口监听多个端口用于通信 | - port:**必选,默认7164**,pserver监听的起始端口,根据ports_num决定总端口个数,从起始端口监听多个端口用于通信
| ports_num | 必选 | 1 | 监听的端口个数 | - ports_num:**必选,默认1**,监听的端口个数
| ports_num_for_sparse | 必选 | 1 | 用于稀疏类型参数通信的端口个数 | - ports_num_for_sparse:**必选,默认1**,用于稀疏类型参数通信的端口个数
| num_gradient_servers | 必选 | 1 | 当前训练任务pserver总数 | - num_gradient_servers:**必选,默认1**,当前训练任务pserver总数
### 启动计算节点 ### 启动计算节点
执行以下命令启动使用python编写的trainer程序(文件名为任意文件名,如train.py) 执行以下命令启动使用python编写的trainer程序(文件名为任意文件名,如train.py)
...@@ -105,16 +89,16 @@ paddle.init( ...@@ -105,16 +89,16 @@ paddle.init(
pservers="127.0.0.1") pservers="127.0.0.1")
``` ```
| 参数 | 是否必选 | 默认 | 说明 | 参数说明
| ------------- | ------------- | ------------- | ------------- |
| use_gpu | 可选 | False | 是否启用GPU训练 | - use_gpu: **可选,默认False**,是否启用GPU训练
| trainer_count | 必选 | 1 | 当前训练任务trainer总个数 | - trainer_count:**必选,默认1**,当前训练任务trainer总个数
| port | 必选 | 7164 | 连接到pserver的端口 | - port:**必选,默认7164**,连接到pserver的端口
| ports_num | 必选 | 1 | 连接到pserver的端口个数 | - ports_num:**必选,默认1**,连接到pserver的端口个数
| ports_num_for_sparse | 必选 | 1 | 和pserver之间用于稀疏类型参数通信的端口个数 | - ports_num_for_sparse:**必选,默认1**,和pserver之间用于稀疏类型参数通信的端口个数
| num_gradient_servers | 必选 | 1 | 当前训练任务pserver总数 | - num_gradient_servers:**必选,默认1**,当前训练任务pserver总数
| trainer_id | 必选 | 0 | 每个trainer的唯一ID,从0开始的整数 | - trainer_id:**必选,默认0**,每个trainer的唯一ID,从0开始的整数
| pservers | 必选 | 127.0.0.1 | 当前训练任务启动的pserver的IP列表,多个IP使用“,”隔开 | - pservers:**必选,默认127.0.0.1**,当前训练任务启动的pserver的IP列表,多个IP使用“,”隔开
### 准备数据集 ### 准备数据集
...@@ -171,7 +155,7 @@ test.txt-00002 ...@@ -171,7 +155,7 @@ test.txt-00002
- `my_lib.py`:会被`train.py`调用的一些用户定义的库函数,比如PIL库等。 - `my_lib.py`:会被`train.py`调用的一些用户定义的库函数,比如PIL库等。
- `word_dict.pickle`:在`train.py`中会使用到的字典数据文件。 - `word_dict.pickle`:在`train.py`中会使用到的字典数据文件。
- `train.py`:训练程序,代码参考[api_train_v2_cluster.py](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/src/word2vec/prepare.py)***注意:*** 对于本样例代码,在使用不同的分布式计算平台时,您可能需要修改`train.py`开头的部分(如下),以便获得训练数据的位置和获取环境变量配置: - `train.py`:训练程序,代码参考[api_train_v2_cluster.py](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py)***注意:*** 对于本样例代码,在使用不同的分布式计算平台时,您可能需要修改`train.py`开头的部分(如下),以便获得训练数据的位置和获取环境变量配置:
```python ```python
cluster_train_file = "./train_data_dir/train/train.txt" cluster_train_file = "./train_data_dir/train/train.txt"
...@@ -195,91 +179,10 @@ PaddlePaddle可以使用多种分布式计算平台构建分布式计算任务 ...@@ -195,91 +179,10 @@ PaddlePaddle可以使用多种分布式计算平台构建分布式计算任务
在使用分布式计算平台进行训练时,任务被调度在集群中时,分布式计算平台通常会通过API或者环境变量提供任务运行需要的参数,比如节点的ID、IP和任务节点个数等。 在使用分布式计算平台进行训练时,任务被调度在集群中时,分布式计算平台通常会通过API或者环境变量提供任务运行需要的参数,比如节点的ID、IP和任务节点个数等。
### 使用Fabric启动集群作业 ## 在不同集群中运行
#### 准备一个Linux集群
可以在`paddle/scripts/cluster_train_v2/fabric/docker_cluster`目录下,执行`kubectl -f ssh_servers.yaml`启动一个测试集群,并使用`kubectl get po -o wide`获得这些节点的IP地址。
#### 启动集群作业
`paddle.py` 提供了自动化脚本来启动不同节点中的所有 PaddlePaddle 集群进程。默认情况下,所有命令行选项可以设置为 `paddle.py` 命令选项并且 `paddle.py` 将透明、自动地将这些选项应用到 PaddlePaddle 底层进程。
`paddle.py` 为方便作业启动提供了两个独特的命令选项。
- `job_dispatch_package` 设为本地 `workspace` 目录,它将被分发到 `conf.py` 中设置的所有节点。它有助于帮助频繁修改和访问工作区文件的用户减少负担,否则频繁的多节点工作空间部署可能会很麻烦。
- `job_workspace` 设为已部署的工作空间目录,`paddle.py` 将跳过分发阶段直接启动所有节点的集群作业。它可以帮助减少分发延迟。
`cluster_train/run.sh` 提供了命令样例来运行 `doc/howto/usage/cluster/src/word2vec` 集群任务,只需用您定义的目录修改 `job_dispatch_package``job_workspace`,然后:
```
sh run.sh
```
集群作业将会在几秒后启动。
#### 终止集群作业
`paddle.py`能获取`Ctrl + C` SIGINT 信号来自动终止它启动的所有进程。只需中断 `paddle.py` 任务来终止集群作业。如果程序崩溃你也可以手动终止。
#### 检查集群训练结果
详细信息请检查 $workspace/log 里的日志,每一个节点都有相同的日志结构。
`paddle_trainer.INFO`
提供几乎所有训练的内部输出日志,与本地训练相同。这里检验运行时间模型的收敛。
`paddle_pserver2.INFO`
提供 pserver 运行日志,有助于诊断分布式错误。
`server.log`
提供 parameter server 进程的 stderr 和 stdout。训练失败时可以检查错误日志。
`train.log`
提供训练过程的 stderr 和 stdout。训练失败时可以检查错误日志。
#### 检查模型输出
运行完成后,模型文件将被写入节点 0 的 `output` 目录中。
工作空间中的 `nodefile` 表示当前集群作业的节点 ID。
### 在OpenMPI集群中提交训练作业
#### 准备OpenMPI集群
执行下面的命令以启动3个节点的OpenMPI集群和一个"head"节点:
```bash
paddle/scripts/cluster_train_v2/openmpi/docker_cluster
kubectl create -f head.yaml
kubectl create -f mpi-nodes.yaml
```
然后可以从head节点ssh无密码登录到OpenMPI的每个节点上。
#### 启动集群作业
您可以按照下面的步骤在OpenMPI集群中提交paddle训练任务:
```bash
# 获得head和node节点的IP地址
kubectl get po -o wide
# 将node节点的IP地址保存到machines文件中
kubectl get po -o wide | grep nodes | awk '{print $6}' > machines
# 拷贝必要的文件到head节点
scp -i ssh/id_rsa.mpi.pub machines prepare.py train.py start_mpi_train.sh tutorial@[headIP]:~
# ssh 登录到head节点
ssh -i ssh/id_rsa.mpi.pub tutorial@[headIP]
# --------------- 以下操作均在head节点中执行 ---------------
# 准备训练数据
python prepare.py
# 拷贝训练程序和字典文件到每台MPI节点
cat machines | xargs -i scp word_dict.pickle train.py start_mpi_train.sh machines {}:/home/tutorial
# 创建日志目录
mpirun -hostfile machines -n 3 mkdir /home/tutorial/logs
# 拷贝训练数据到各自的节点
scp train.txt-00000 test.txt-00000 [node1IP]:/home/tutorial
scp train.txt-00001 test.txt-00001 [node2IP]:/home/tutorial
scp train.txt-00002 test.txt-00002 [node3IP]:/home/tutorial
# 启动训练任务
mpirun -hostfile machines -n 3 /home/tutorial/start_mpi_train.sh
```
### 在Kubernetes集群中提交训练作业
此部分的使用方法可以参考[here](../k8s/k8s_distributed_cn.md) - [fabric集群](fabric_cn.md)
- [openmpi集群](openmpi_cn.md)
- [kubernetes单机](k8s_cn.md)
- [kubernetes distributed分布式](k8s_distributed_cn.md)
- [AWS上运行kubernetes集群训练](k8s_aws_cn.md)
# PaddlePaddle Distributed Training # Distributed Training
* [Introduction](#introduction)
* [Preparations](#preparations)
* [Command-line arguments](#command-line-arguments)
* [Starting parameter server](#starting-parameter-server)
* [Starting trainer](#starting-trainer)
* [Prepare Training Dataset](#prepare-training-dataset)
* [Prepare Training program](#prepare-training-program)
* [Use cluster platforms or cluster management tools](#use-cluster-platforms-or-cluster-management-tools)
* [Cluster Training Using Fabric](#cluster-training-using-fabric)
* [Prepare a Linux cluster](#prepare-a-linux-cluster)
* [Launching Cluster Job](#launching-cluster-job)
* [Kill Cluster Job](#kill-cluster-job)
* [Check Cluster Training Result](#check-cluster-training-result)
* [Check Model Output](#check-model-output)
* [Cluster Training Using OpenMPI](#cluster-training-using-openmpi)
* [Prepare an OpenMPI cluster](#prepare-an-openmpi-cluster)
* [Launching Cluster Job](#launching-cluster-job-1)
* [Cluster Training Using Kubernetes](#cluster-training-using-kubernetes)
## Introduction ## Introduction
...@@ -35,7 +16,7 @@ When training with synchronize SGD, PaddlePaddle uses an internal "synchronize b ...@@ -35,7 +16,7 @@ When training with synchronize SGD, PaddlePaddle uses an internal "synchronize b
## Preparations ## Preparations
1. Prepare your computer cluster. It's normally a bunch of Linux servers connected by LAN. Each server will be assigned a unique IP address. The computers in the cluster can be called "nodes". 1. Prepare your computer cluster. It's normally a bunch of Linux servers connected by LAN. Each server will be assigned a unique IP address. The computers in the cluster can be called "nodes".
2. Install PaddlePaddle on every node. If you are going to take advantage of GPU cards, you'll also need to install proper driver and CUDA libraries. To install PaddlePaddle please read [this build and install](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/getstarted/build_and_install) document. We strongly recommend using [Docker installation](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst). 2. Install PaddlePaddle on every node. If you are going to take advantage of GPU cards, you'll also need to install proper driver and CUDA libraries. To install PaddlePaddle please read [this build and install](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/index_en.html) document. We strongly recommend using [Docker installation](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/docker_install_en.html).
After installation, you can check the version by typing the below command (run a docker container if using docker: `docker run -it paddlepaddle/paddle:[tag] /bin/bash`): After installation, you can check the version by typing the below command (run a docker container if using docker: `docker run -it paddlepaddle/paddle:[tag] /bin/bash`):
...@@ -67,12 +48,12 @@ If you wish to run parameter servers in background, and save a log file, you can ...@@ -67,12 +48,12 @@ If you wish to run parameter servers in background, and save a log file, you can
$ stdbuf -oL /usr/bin/nohup paddle pserver --port=7164 --ports_num=1 --ports_num_for_sparse=1 --num_gradient_servers=1 &> pserver.log $ stdbuf -oL /usr/bin/nohup paddle pserver --port=7164 --ports_num=1 --ports_num_for_sparse=1 --num_gradient_servers=1 &> pserver.log
``` ```
| param | required | default | description | Parameter Description
| ------------- | ------------- | ------------- | ------------- |
| port | required | 7164 | port which parameter server will listen on. If ports_num greater than 1, parameter server will listen on multiple ports for more network throughput | - port: **required, default 7164**, port which parameter server will listen on. If ports_num greater than 1, parameter server will listen on multiple ports for more network throughput.
| ports_num | required | 1 | total number of ports will listen on | - ports_num: **required, default 1**, total number of ports will listen on.
| ports_num_for_sparse | required | 1 | number of ports which serves sparse parameter update | - ports_num_for_sparse: **required, default 1**, number of ports which serves sparse parameter update.
| num_gradient_servers | required | 1 | total number of gradient servers | - num_gradient_servers: **required, default 1**, total number of gradient servers.
### Starting trainer ### Starting trainer
Type the command below to start the trainer(name the file whatever you want, like "train.py") Type the command below to start the trainer(name the file whatever you want, like "train.py")
...@@ -111,16 +92,16 @@ paddle.init( ...@@ -111,16 +92,16 @@ paddle.init(
pservers="127.0.0.1") pservers="127.0.0.1")
``` ```
| param | required | default | description | Parameter Description
| ------------- | ------------- | ------------- | ------------- |
| use_gpu | optional | False | set to "True" to enable GPU training | - use_gpu: **optional, default False**, set to "True" to enable GPU training.
| trainer_count | required | 1 | total count of trainers in the training job | - trainer_count: **required, default 1**, total count of trainers in the training job.
| port | required | 7164 | port to connect to parameter server | - port: **required, default 7164**, port to connect to parameter server.
| ports_num | required | 1 | number of ports for communication | - ports_num: **required, default 1**, number of ports for communication.
| ports_num_for_sparse | required | 1 | number of ports for sparse type caculation | - ports_num_for_sparse: **required, default 1**, number of ports for sparse type caculation.
| num_gradient_servers | required | 1 | total number of gradient server | - num_gradient_servers: **required, default 1**, total number of gradient server.
| trainer_id | required | 0 | ID for every trainer, start from 0 | - trainer_id: **required, default 0**, ID for every trainer, start from 0.
| pservers | required | 127.0.0.1 | list of IPs of parameter servers, separated by "," | - pservers: **required, default 127.0.0.1**, list of IPs of parameter servers, separated by ",".
### Prepare Training Dataset ### Prepare Training Dataset
...@@ -178,7 +159,7 @@ Your workspace may looks like: ...@@ -178,7 +159,7 @@ Your workspace may looks like:
- `my_lib.py`: user defined libraries, like PIL libs. This is optional. - `my_lib.py`: user defined libraries, like PIL libs. This is optional.
- `word_dict.pickle`: dict file for training word embeding. - `word_dict.pickle`: dict file for training word embeding.
- `train.py`: training program. Sample code: [api_train_v2_cluster.py](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/src/word2vec/prepare.py). ***NOTE:*** You may need to modify the head part of `train.py` when using different cluster platform to retrive configuration environment variables: - `train.py`: training program. Sample code: [api_train_v2_cluster.py](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py). ***NOTE:*** You may need to modify the head part of `train.py` when using different cluster platform to retrive configuration environment variables:
```python ```python
cluster_train_file = "./train_data_dir/train/train.txt" cluster_train_file = "./train_data_dir/train/train.txt"
...@@ -202,92 +183,9 @@ We'll introduce cluster job management on these platforms. The examples can be f ...@@ -202,92 +183,9 @@ We'll introduce cluster job management on these platforms. The examples can be f
These cluster platforms provide API or environment variables for training processes, when the job is dispatched to different nodes. Like node ID, IP or total number of nodes etc. These cluster platforms provide API or environment variables for training processes, when the job is dispatched to different nodes. Like node ID, IP or total number of nodes etc.
### Cluster Training Using Fabric ## Use different clusters
#### Prepare a Linux cluster
Run `kubectl -f ssh_servers.yaml` under the directory: `paddle/scripts/cluster_train_v2/fabric/docker_cluster` will launch a demo cluster. Run `kubectl get po -o wide` to get IP addresses of these nodes.
#### Launching Cluster Job
`paddle.py` provides automatical scripts to start all PaddlePaddle cluster processes in different nodes. By default, all command line options can be set as `paddle.py` command options and `paddle.py` will transparently and automatically set these options to PaddlePaddle lower level processes.
`paddle.py`provides two distinguished command option for easy job launching.
- `job_dispatch_package` set it with local `workspace` directory, it will be dispatched to all nodes which is set in `conf.py`. It could be helpful for frequently manipulating workspace files. otherwise, frequent multi-nodes workspace deployment is very annoying.
- `job_workspace` set it with already deployed workspace directory, `paddle.py` will skip dispatch stage to directly launch cluster job with all nodes. It could help to reduce heavy
dispatch latency.
`cluster_train/run.sh` provides command line sample to run `demo/recommendation` cluster job, just modify `job_dispatch_package` and `job_workspace` with your defined directory, then:
```
sh run.sh
```
The cluster Job will start in several seconds.
#### Kill Cluster Job
`paddle.py` can capture `Ctrl + C` SIGINT signal to automatically kill all processes launched by it. So just stop `paddle.py` to kill cluster job. You should manually kill the job if the program crashed.
#### Check Cluster Training Result
Check log in $workspace/log for details, each node owns same log structure.
`paddle_trainer.INFO`
It provides almost all internal output log for training, same as local training. Check runtime model convergence here.
`paddle_pserver2.INFO`
It provides parameter server running log, which could help to diagnose distributed error.
`server.log`
It provides stderr and stdout of parameter server process. Check error log if training crashes.
`train.log`
It provides stderr and stdout of trainer process. Check error log if training crashes.
#### Check Model Output
After one pass finished, model files will be written in `output` directory in node 0.
`nodefile` in workspace indicates the node id of current cluster job.
### Cluster Training Using OpenMPI
#### Prepare an OpenMPI cluster
Run the following command to start a 3-node MPI cluster and one "head" node.
```bash
cd paddle/scripts/cluster_train_v2/openmpi/docker_cluster
kubectl create -f head.yaml
kubectl create -f mpi-nodes.yaml
```
Then you can log in to every OpenMPI node using ssh without input any passwords.
#### Launching Cluster Job
Follow the steps to launch a PaddlePaddle training job in OpenMPI cluster:\
```bash
# find out node IP addresses
kubectl get po -o wide
# generate a "machines" file containing node IP addresses
kubectl get po -o wide | grep nodes | awk '{print $6}' > machines
# copy necessary files onto "head" node
scp -i ssh/id_rsa.mpi.pub machines prepare.py train.py start_mpi_train.sh tutorial@[headIP]:~
# login to head node using ssh
ssh -i ssh/id_rsa.mpi.pub tutorial@[headIP]
# --------------- in head node ---------------
# prepare training data
python prepare.py
# copy training data and dict file to MPI nodes
cat machines | xargs -i scp word_dict.pickle train.py start_mpi_train.sh machines {}:/home/tutorial
# creat a directory for storing log files
mpirun -hostfile machines -n 3 mkdir /home/tutorial/logs
# copy training data to every node
scp train.txt-00000 test.txt-00000 [node1IP]:/home/tutorial
scp train.txt-00001 test.txt-00001 [node2IP]:/home/tutorial
scp train.txt-00002 test.txt-00002 [node3IP]:/home/tutorial
# start the job
mpirun -hostfile machines -n 3 /home/tutorial/start_mpi_train.sh
```
### Cluster Training Using Kubernetes
The details can be found [here](../k8s/k8s_cn.md) - [fabric](fabric_en.md)
- [openmpi](openmpi_en.md)
- [kubernetes](k8s_en.md)
- [kubernetes on AWS](k8s_aws_en.md)
# 使用fabric启动集群训练
## 准备一个Linux集群
可以在`paddle/scripts/cluster_train_v2/fabric/docker_cluster`目录下,执行`kubectl -f ssh_servers.yaml`启动一个测试集群,并使用`kubectl get po -o wide`获得这些节点的IP地址。
## 启动集群作业
`paddle.py` 提供了自动化脚本来启动不同节点中的所有 PaddlePaddle 集群进程。默认情况下,所有命令行选项可以设置为 `paddle.py` 命令选项并且 `paddle.py` 将透明、自动地将这些选项应用到 PaddlePaddle 底层进程。
`paddle.py` 为方便作业启动提供了两个独特的命令选项。
- `job_dispatch_package` 设为本地 `workspace` 目录,它将被分发到 `conf.py` 中设置的所有节点。它有助于帮助频繁修改和访问工作区文件的用户减少负担,否则频繁的多节点工作空间部署可能会很麻烦。
- `job_workspace` 设为已部署的工作空间目录,`paddle.py` 将跳过分发阶段直接启动所有节点的集群作业。它可以帮助减少分发延迟。
`cluster_train/run.sh` 提供了命令样例来运行 `doc/howto/usage/cluster/src/word2vec` 集群任务,只需用您定义的目录修改 `job_dispatch_package``job_workspace`,然后:
```
sh run.sh
```
集群作业将会在几秒后启动。
## 终止集群作业
`paddle.py`能获取`Ctrl + C` SIGINT 信号来自动终止它启动的所有进程。只需中断 `paddle.py` 任务来终止集群作业。如果程序崩溃你也可以手动终止。
## 检查集群训练结果
详细信息请检查 $workspace/log 里的日志,每一个节点都有相同的日志结构。
`paddle_trainer.INFO`
提供几乎所有训练的内部输出日志,与本地训练相同。这里检验运行时间模型的收敛。
`paddle_pserver2.INFO`
提供 pserver 运行日志,有助于诊断分布式错误。
`server.log`
提供 parameter server 进程的 stderr 和 stdout。训练失败时可以检查错误日志。
`train.log`
提供训练过程的 stderr 和 stdout。训练失败时可以检查错误日志。
## 检查模型输出
运行完成后,模型文件将被写入节点 0 的 `output` 目录中。
工作空间中的 `nodefile` 表示当前集群作业的节点 ID。
# Cluster Training Using Fabric
## Prepare a Linux cluster
Run `kubectl -f ssh_servers.yaml` under the directory: `paddle/scripts/cluster_train_v2/fabric/docker_cluster` will launch a demo cluster. Run `kubectl get po -o wide` to get IP addresses of these nodes.
## Launching Cluster Job
`paddle.py` provides automatical scripts to start all PaddlePaddle cluster processes in different nodes. By default, all command line options can be set as `paddle.py` command options and `paddle.py` will transparently and automatically set these options to PaddlePaddle lower level processes.
`paddle.py`provides two distinguished command option for easy job launching.
- `job_dispatch_package` set it with local `workspace` directory, it will be dispatched to all nodes which is set in `conf.py`. It could be helpful for frequently manipulating workspace files. otherwise, frequent multi-nodes workspace deployment is very annoying.
- `job_workspace` set it with already deployed workspace directory, `paddle.py` will skip dispatch stage to directly launch cluster job with all nodes. It could help to reduce heavy
dispatch latency.
`cluster_train/run.sh` provides command line sample to run `demo/recommendation` cluster job, just modify `job_dispatch_package` and `job_workspace` with your defined directory, then:
```
sh run.sh
```
The cluster Job will start in several seconds.
## Kill Cluster Job
`paddle.py` can capture `Ctrl + C` SIGINT signal to automatically kill all processes launched by it. So just stop `paddle.py` to kill cluster job. You should manually kill the job if the program crashed.
## Check Cluster Training Result
Check log in $workspace/log for details, each node owns same log structure.
`paddle_trainer.INFO`
It provides almost all internal output log for training, same as local training. Check runtime model convergence here.
`paddle_pserver2.INFO`
It provides parameter server running log, which could help to diagnose distributed error.
`server.log`
It provides stderr and stdout of parameter server process. Check error log if training crashes.
`train.log`
It provides stderr and stdout of trainer process. Check error log if training crashes.
## Check Model Output
After one pass finished, model files will be written in `output` directory in node 0.
`nodefile` in workspace indicates the node id of current cluster job.
k8s_aws_en.md
\ No newline at end of file
...@@ -493,7 +493,7 @@ spec: ...@@ -493,7 +493,7 @@ spec:
spec: spec:
containers: containers:
- name: paddle-data - name: paddle-data
image: paddledev/paddle-tutorial:k8s_data image: paddlepaddle/paddle-tutorial:k8s_data
imagePullPolicy: Always imagePullPolicy: Always
volumeMounts: volumeMounts:
- mountPath: "/efs" - mountPath: "/efs"
...@@ -522,7 +522,7 @@ NAME DESIRED SUCCESSFUL AGE ...@@ -522,7 +522,7 @@ NAME DESIRED SUCCESSFUL AGE
paddle-data 1 1 6m paddle-data 1 1 6m
``` ```
Data preparation is done by docker image `paddledev/paddle-tutorial:k8s_data`, see [here](src/k8s_data/README.md) for how to build this docker image and source code. Data preparation is done by docker image `paddlepaddle/paddle-tutorial:k8s_data`, see [here](src/k8s_data/README.md) for how to build this docker image and source code.
#### Start Training #### Start Training
...@@ -545,7 +545,7 @@ spec: ...@@ -545,7 +545,7 @@ spec:
claimName: efsvol claimName: efsvol
containers: containers:
- name: trainer - name: trainer
image: paddledev/paddle-tutorial:k8s_train image: paddlepaddle/paddle-tutorial:k8s_train
command: ["bin/bash", "-c", "/root/start.sh"] command: ["bin/bash", "-c", "/root/start.sh"]
env: env:
- name: JOB_NAME - name: JOB_NAME
...@@ -617,7 +617,7 @@ kubectl --kubeconfig=kubeconfig log -f POD_NAME ...@@ -617,7 +617,7 @@ kubectl --kubeconfig=kubeconfig log -f POD_NAME
Run `kubectl --kubeconfig=kubeconfig describe job paddle-cluster-job` to check training job status. It will complete in around 20 minutes. Run `kubectl --kubeconfig=kubeconfig describe job paddle-cluster-job` to check training job status. It will complete in around 20 minutes.
The details for start `pserver` and `trainer` are hidden inside docker image `paddledev/paddle-tutorial:k8s_train`, see [here](src/k8s_train/README.md) for how to build the docker image and source code. The details for start `pserver` and `trainer` are hidden inside docker image `paddlepaddle/paddle-tutorial:k8s_train`, see [here](src/k8s_train/README.md) for how to build the docker image and source code.
#### Inspect Training Output #### Inspect Training Output
......
# Kubernetes单机训练 # Kubernetes单机训练
在这篇文档里,我们介绍如何在 Kubernetes 集群上启动一个单机使用CPU的Paddle训练作业。在下一篇中,我们将介绍如何启动分布式训练作业。 在这篇文档里,我们介绍如何在 Kubernetes 集群上启动一个单机使用CPU的PaddlePaddle训练作业。在下一篇中,我们将介绍如何启动分布式训练作业。
## 制作Docker镜像 ## 制作Docker镜像
在一个功能齐全的Kubernetes机群里,通常我们会安装Ceph等分布式文件系统来存储训练数据。这样的话,一个分布式Paddle训练任务中的每个进程都可以从Ceph读取数据。在这个例子里,我们只演示一个单机作业,所以可以简化对环境的要求,把训练数据直接放在 在一个功能齐全的Kubernetes机群里,通常我们会安装Ceph等分布式文件系统来存储训练数据。这样的话,一个分布式PaddlePaddle训练任务中
Paddle的Docker image里。为此,我们需要制作一个包含训练数据的Paddle镜像。 的每个进程都可以从Ceph读取数据。在这个例子里,我们只演示一个单机作业,所以可以简化对环境的要求,把训练数据直接放在
PaddlePaddle的Docker Image里。为此,我们需要制作一个包含训练数据的PaddlePaddle镜像。
PaddlePaddle的 `paddlepaddle/paddle:cpu-demo-latest` 镜像里有PaddlePaddle的源码与demo,
(请注意,默认的PaddlePaddle生产环境镜像 `paddlepaddle/paddle:latest` 是不包括源码的,PaddlePaddle的各版本镜像可以参考
[Docker Installation Guide](http://paddlepaddle.org/docs/develop/documentation/zh/getstarted/build_and_install/docker_install_cn.html)),
下面我们使用这个镜像来下载数据到Docker Container中,并把这个包含了训练数据的Container保存为一个新的镜像。
Paddle 的 [Quick Start Tutorial](http://www.paddlepaddle.org/doc/demo/quick_start/index_en.html)
里介绍了用Paddle源码中的脚本下载训练数据的过程。
`paddledev/paddle:cpu-demo-latest` 镜像里有 Paddle 源码与demo,( 请注意,默认的
Paddle镜像 `paddledev/paddle:cpu-latest` 是不包括源码的, Paddle的各版本镜像可以参考 [Docker installation guide](http://www.paddlepaddle.org/doc/build/docker_install.html) ),所以我们使用这个镜像来下载训练数据到Docker container中,然后把这个包含了训练数据的container保存为一个新的镜像。
### 运行容器 ### 运行容器
``` ```
$ docker run --name quick_start_data -it paddledev/paddle:cpu-demo-latest $ docker run --name quick_start_data -it paddlepaddle/paddle:cpu-demo-latest
``` ```
### 下载数据 ### 下载数据
...@@ -103,7 +104,7 @@ spec: ...@@ -103,7 +104,7 @@ spec:
restartPolicy: Never restartPolicy: Never
``` ```
### 创建Paddle Job ### 创建PaddlePaddle Job
使用上文创建的yaml文件创建Kubernetes Job,命令为: 使用上文创建的yaml文件创建Kubernetes Job,命令为:
......
# Kubernetes分布式训练 # Kubernetes分布式训练
前一篇文章介绍了如何在Kubernetes集群上启动一个单机PaddlePaddle训练作业 (Job)。在这篇文章里,我们介绍如何在Kubernetes集群上进行分布式PaddlePaddle训练作业。关于PaddlePaddle的分布式训练,文章 [Cluster Training](https://github.com/baidu/Paddle/blob/develop/doc/cluster/opensource/cluster_train.md)介绍了一种通过SSH远程分发任务,进行分布式训练的方法,与此不同的是,本文将介绍在Kubernetes容器管理平台上快速构建PaddlePaddle容器集群,进行分布式训练的方案。 前一篇文章介绍了如何在Kubernetes集群上启动一个单机PaddlePaddle训练作业 (Job)。在这篇文章里,我们介绍如何在Kubernetes集群上进行分布式PaddlePaddle训练作业。关于PaddlePaddle的分布式训练,文章 [Cluster Training](http://www.paddlepaddle.org/docs/develop/documentation/zh/howto/usage/cluster/cluster_train_cn.html)介绍了一种通过SSH远程分发任务,进行分布式训练的方法,与此不同的是,本文将介绍在Kubernetes容器管理平台上快速构建PaddlePaddle容器集群,进行分布式训练的方案。
有关Kubernetes相关概念以及如何搭建和配置Kubernetes集群,可以参考[k8s_basis](./k8s_basis_cn.md)
## 整体方案 ## 整体方案
...@@ -28,7 +26,7 @@ PaddlePaddle镜像需要提供`paddle pserver`与`paddle train`进程的运行 ...@@ -28,7 +26,7 @@ PaddlePaddle镜像需要提供`paddle pserver`与`paddle train`进程的运行
- 拷贝训练文件到容器内 - 拷贝训练文件到容器内
- 生成`paddle pserver``paddle train`进程的启动参数,并且启动训练 - 生成`paddle pserver``paddle train`进程的启动参数,并且启动训练
因为官方镜像 `paddledev/paddle:cpu-latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。参考镜像的[*Dockerfile*](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/src/k8s_train/Dockerfile) 因为官方镜像 `paddlepaddle/paddle:latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。参考镜像的[*Dockerfile*](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/src/k8s_train/Dockerfile)
```bash ```bash
$ cd doc/howto/usage/k8s/src/k8s_train $ cd doc/howto/usage/k8s/src/k8s_train
...@@ -62,7 +60,7 @@ spec: ...@@ -62,7 +60,7 @@ spec:
hostNetwork: true hostNetwork: true
containers: containers:
- name: paddle-data - name: paddle-data
image: paddledev/paddle-tutorial:k8s_data image: paddlepaddle/paddle-tutorial:k8s_data
imagePullPolicy: Always imagePullPolicy: Always
volumeMounts: volumeMounts:
- mountPath: "/mnt" - mountPath: "/mnt"
...@@ -149,20 +147,19 @@ spec: ...@@ -149,20 +147,19 @@ spec:
文件中,`metadata`下的`name`表示这个job的名字。`parallelism,completions`字段表示这个job会同时开启3个PaddlePaddle节点,成功训练且退出的pod数目为3时,这个job才算成功结束。然后申明一个存储卷`jobpath`,代表宿主机目录`/home/work/mfs`,在对容器的描述`containers`字段中,将此目录挂载为容器的`/home/jobpath`目录,这样容器的`/home/jobpath`目录就成为了共享存储,放在这个目录里的文件其实是保存到了MFS上。 文件中,`metadata`下的`name`表示这个job的名字。`parallelism,completions`字段表示这个job会同时开启3个PaddlePaddle节点,成功训练且退出的pod数目为3时,这个job才算成功结束。然后申明一个存储卷`jobpath`,代表宿主机目录`/home/work/mfs`,在对容器的描述`containers`字段中,将此目录挂载为容器的`/home/jobpath`目录,这样容器的`/home/jobpath`目录就成为了共享存储,放在这个目录里的文件其实是保存到了MFS上。
`env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内。 `env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内:
环境变量 | 说明 - JOB_PATH:共享存储挂在的路径
--- | --- - JOB_NAME:Job的名字
JOB_PATH | 共享存储挂在的路径 - TRAIN_CONFIG_DIR:本次训练文件所在目录,与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径
JOB_NAME | Job的名字 - CONF_PADDLE_NIC:`paddle pserver`进程需要的`--nics`参数,即网卡名
TRAIN_CONFIG_DIR | 本次训练文件所在目录,与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径 - CONF_PADDLE_PORT:`paddle paserver``--port`参数
CONF_PADDLE_NIC | `paddle pserver`进程需要的`--nics`参数,即网卡名 - CONF_PADDLE_PORTS_NUM:稠密更新的端口数量,即`--ports_num`参数
CONF_PADDLE_PORT | `paddle paserver``--port`参数 - CONF_PADDLE_PORTS_NUM_SPARSE:稀疏更新的端口数量,即`--ports_num_for_sparse`参数
CONF_PADDLE_PORTS_NUM | 稠密更新的端口数量,即`--ports_num`参数 - CONF_PADDLE_GRADIENT_NUM:训练节点数量,即`--num_gradient_servers参数`
CONF_PADDLE_PORTS_NUM_SPARSE | 稀疏更新的端口数量,即`--ports_num_for_sparse`参数
CONF_PADDLE_GRADIENT_NUM | 训练节点数量,即`--num_gradient_servers参数`
这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication) 这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/docs/develop/documentation/zh/howto/usage/cmd_parameter/detail_introduction_cn.html)
编写完YAML文件后,可以使用Kubernetes的命令行工具创建job。 编写完YAML文件后,可以使用Kubernetes的命令行工具创建job。
......
# Paddle On Kubernetes # PaddlePaddle On Kubernetes
>In this article, we will introduce how to run Paddle training job on single CPU machine using Kubernetes. In next article, we will introduce how to run Paddle training job on distributed cluster. In this article, we will introduce how to run PaddlePaddle training job on single CPU machine using Kubernetes. In next article, we will introduce how to run PaddlePaddle training job on distributed cluster.
## Build Docker Image ## Build Docker Image
In distributed Kubernetes cluster, we will use Ceph or other shared storage system for storing training related data so that all processes in Paddle training can retrieve data from Ceph. In this example, we will only demo training job on single machine. In order to simplify the requirement of the environment, we will directly put training data into Paddle's Docker Image, so we need to create a Paddle Docker image that already includes the training data. In distributed Kubernetes cluster, we will use Ceph or other distributed
storage system for storing training related data so that all processes in
PaddlePaddle training can retrieve data from Ceph. In this example, we will
only demo training job on single machine. In order to simplify the requirement
of the environment, we will directly put training data into the PaddlePaddle Docker Image,
so we need to create a PaddlePaddle Docker image that includes the training data.
The production Docker Image `paddlepaddle/paddle:cpu-demo-latest` has the PaddlePaddle
source code and demo. (Caution: Default PaddlePaddle Docker Image `paddlepaddle/paddle:latest` doesn't include
the source code, PaddlePaddle's different versions of Docker Image can be referred here:
[Docker Installation Guide](http://paddlepaddle.org/docs/develop/documentation/zh/getstarted/build_and_install/docker_install_en.html)),
so we run this Docker Image and download the training data, and then commit the whole
Container to be a new Docker Image.
Paddle's [Quick Start Tutorial](http://www.paddlepaddle.org/doc/demo/quick_start/index_en.html) introduces how to download and train data by using script from Paddle's source code.
And `paddledev/paddle:cpu-demo-latest` image has the Paddle source code and demo. (Caution: Default Paddle image `paddledev/paddle:cpu-latest` doesn't include the source code, Paddle's different versions of image can be referred here: [Docker installation guide](http://www.paddlepaddle.org/doc/build/docker_install.html)), so we run this container and download the training data, and then commit the whole container to be a new Docker image.
### Run Docker Container ### Run Docker Container
``` ```
$ docker run --name quick_start_data -it paddledev/paddle:cpu-demo-latest $ docker run --name quick_start_data -it paddlepaddle/paddle:cpu-demo-latest
``` ```
### Download Training Data ### Download Training Data
...@@ -67,7 +76,7 @@ $ docker commit quick_start_data mypaddle/paddle:quickstart ...@@ -67,7 +76,7 @@ $ docker commit quick_start_data mypaddle/paddle:quickstart
## Use Kubernetes For Training ## Use Kubernetes For Training
>We will use Kubernetes job for training process, following steps shows how to do the training with Kubernetes. We will use Kubernetes job for training process, following steps shows how to do the training with Kubernetes.
### Create Yaml Files ### Create Yaml Files
...@@ -99,7 +108,7 @@ spec: ...@@ -99,7 +108,7 @@ spec:
restartPolicy: Never restartPolicy: Never
``` ```
### Start Paddle Job ### Start PaddlePaddle Job
Using the above yaml file to start the Kubernetes job. Using the above yaml file to start the Kubernetes job.
......
# 在OpenMPI集群中提交训练作业
## 准备OpenMPI集群
执行下面的命令以启动3个节点的OpenMPI集群和一个"head"节点:
```bash
paddle/scripts/cluster_train_v2/openmpi/docker_cluster
kubectl create -f head.yaml
kubectl create -f mpi-nodes.yaml
```
然后可以从head节点ssh无密码登录到OpenMPI的每个节点上。
## 启动集群作业
您可以按照下面的步骤在OpenMPI集群中提交paddle训练任务:
```bash
# 获得head和node节点的IP地址
kubectl get po -o wide
# 将node节点的IP地址保存到machines文件中
kubectl get po -o wide | grep nodes | awk '{print $6}' > machines
# 拷贝必要的文件到head节点
scp -i ssh/id_rsa.mpi.pub machines prepare.py train.py start_mpi_train.sh tutorial@[headIP]:~
# ssh 登录到head节点
ssh -i ssh/id_rsa.mpi.pub tutorial@[headIP]
# --------------- 以下操作均在head节点中执行 ---------------
# 准备训练数据
python prepare.py
# 拷贝训练程序和字典文件到每台MPI节点
cat machines | xargs -i scp word_dict.pickle train.py start_mpi_train.sh machines {}:/home/tutorial
# 创建日志目录
mpirun -hostfile machines -n 3 mkdir /home/tutorial/logs
# 拷贝训练数据到各自的节点
scp train.txt-00000 test.txt-00000 [node1IP]:/home/tutorial
scp train.txt-00001 test.txt-00001 [node2IP]:/home/tutorial
scp train.txt-00002 test.txt-00002 [node3IP]:/home/tutorial
# 启动训练任务
mpirun -hostfile machines -n 3 /home/tutorial/start_mpi_train.sh
```
# Cluster Training Using OpenMPI
## Prepare an OpenMPI cluster
Run the following command to start a 3-node MPI cluster and one "head" node.
```bash
cd paddle/scripts/cluster_train_v2/openmpi/docker_cluster
kubectl create -f head.yaml
kubectl create -f mpi-nodes.yaml
```
Then you can log in to every OpenMPI node using ssh without input any passwords.
## Launching Cluster Job
Follow the steps to launch a PaddlePaddle training job in OpenMPI cluster:\
```bash
# find out node IP addresses
kubectl get po -o wide
# generate a "machines" file containing node IP addresses
kubectl get po -o wide | grep nodes | awk '{print $6}' > machines
# copy necessary files onto "head" node
scp -i ssh/id_rsa.mpi.pub machines prepare.py train.py start_mpi_train.sh tutorial@[headIP]:~
# login to head node using ssh
ssh -i ssh/id_rsa.mpi.pub tutorial@[headIP]
# --------------- in head node ---------------
# prepare training data
python prepare.py
# copy training data and dict file to MPI nodes
cat machines | xargs -i scp word_dict.pickle train.py start_mpi_train.sh machines {}:/home/tutorial
# creat a directory for storing log files
mpirun -hostfile machines -n 3 mkdir /home/tutorial/logs
# copy training data to every node
scp train.txt-00000 test.txt-00000 [node1IP]:/home/tutorial
scp train.txt-00001 test.txt-00001 [node2IP]:/home/tutorial
scp train.txt-00002 test.txt-00002 [node3IP]:/home/tutorial
# start the job
mpirun -hostfile machines -n 3 /home/tutorial/start_mpi_train.sh
```
FROM paddledev/paddle:cpu-latest FROM paddlepaddle/paddle:latest
MAINTAINER zjsxzong89@gmail.com MAINTAINER zjsxzong89@gmail.com
......
FROM paddledev/paddle:cpu-latest FROM paddlepaddle/paddle:latest
COPY start.sh /root/ COPY start.sh /root/
COPY start_paddle.py /root/ COPY start_paddle.py /root/
......
# Kubernetes 简介
[*Kubernetes*](http://kubernetes.io/)是Google开源的容器集群管理系统,其提供应用部署、维护、扩展机制等功能,利用Kubernetes能方便地管理跨机器运行容器化的应用。Kubernetes可以在物理机或虚拟机上运行,且支持部署到[AWS](http://kubernetes.io/docs/getting-started-guides/aws)[Azure](http://kubernetes.io/docs/getting-started-guides/azure/)[GCE](http://kubernetes.io/docs/getting-started-guides/gce)等多种公有云环境。介绍分布式训练之前,需要对[Kubernetes](http://kubernetes.io/)有一个基本的认识,下面先简要介绍一下本文用到的几个Kubernetes概念。
- [*Node*](http://kubernetes.io/docs/admin/node/) 表示一个Kubernetes集群中的一个工作节点,这个节点可以是物理机或者虚拟机,Kubernetes集群就是由node节点与master节点组成的。
- [*Pod*](http://kubernetes.io/docs/user-guide/pods/) 是一组(一个或多个)容器,pod是Kubernetes的最小调度单元,一个pod中的所有容器会被调度到同一个node上。Pod中的容器共享NET,PID,IPC,UTS等Linux namespace。由于容器之间共享NET namespace,所以它们使用同一个IP地址,可以通过*localhost*互相通信。不同pod之间可以通过IP地址访问。
- [*Job*](http://kubernetes.io/docs/user-guide/jobs/) 描述Kubernetes上运行的作业,一次作业称为一个job,通常每个job包括一个或者多个pods,job启动后会创建这些pod并开始执行一个程序,等待这个程序执行成功并返回0则成功退出,如果执行失败,也可以配置不同的重试机制。
- [*Volume*](http://kubernetes.io/docs/user-guide/volumes/) 存储卷,是pod内的容器都可以访问的共享目录,也是容器与node之间共享文件的方式,因为容器内的文件都是暂时存在的,当容器因为各种原因被销毁时,其内部的文件也会随之消失。通过volume,就可以将这些文件持久化存储。Kubernetes支持多种volume,例如hostPath(宿主机目录),gcePersistentDisk,awsElasticBlockStore等。
- [*Namespaces*](https://kubernetes.io/docs/user-guide/namespaces/) 命名空间,在kubernetes中创建的所有资源对象(例如上文的pod,job)等都属于一个命名空间,在同一个命名空间中,资源对象的名字是唯一的,不同空间的资源名可以重复,命名空间主要为了对象进行逻辑上的分组便于管理。本文只使用了默认命名空间。
- [*PersistentVolume*](https://kubernetes.io/docs/user-guide/persistent-volumes/): 和[*PersistentVolumeClaim*](https://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims)结合,将外部的存储服务在Kubernetes中描述成为统一的资源形式,便于存储资源管理和Pod引用。
## 部署Kubernetes集群
Kubernetes提供了多种集群部署的方案,本文档内不重复介绍。这里给出集中常见的部署方法:
- [*minikube*](https://kubernetes.io/docs/getting-started-guides/minikube/): 快速在本地启动一个单机的kubernetes服务器,便于本地验证和测试。
- [*kubeadm*](http://kubernetes.io/docs/getting-started-guides/kubeadm/): 在不同操作系统,不同主机(Bare-Metal, AWS, GCE)条件下,快速部署集群。
- [*AWS EC2*](https://kubernetes.io/docs/getting-started-guides/aws/): 在aws上快速部署集群。
- [*Bare-Metal*](https://kubernetes.io/docs/getting-started-guides/centos/centos_manual_config/): 在物理机上手动部署。
可以参考[这个表格](https://kubernetes.io/docs/getting-started-guides/#table-of-solutions)选择适合您的场景的合适方案。
## 选择存储方案
容器不会保留在运行时生成的数据,job或者应用程序在容器中运行时生成的数据会在容器销毁时消失。为了完成分布式机器学习训练任务,需要有一个外部的存储服务来保存训练所需数据和训练输出。
常见的可选存储服务包括:
- [*NFS*](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/nfs): 可以将磁盘上某个目录共享给网络中其他机器访问。部署和配置比较简单,可以用于小量数据的验证。不提供分布式存储,高可用,冗余等功能。NFS的部署方法可以参考[这里](http://www.tecmint.com/how-to-setup-nfs-server-in-linux/)
- [*GlusterFS*](http://gluster.readthedocs.io/en/latest/Quick-Start-Guide/Quickstart/): 网络分布式文件系统,可以在Kubernetes中按照[这个](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/glusterfs)例子使用。
- [*Ceph*](http://docs.ceph.com/docs/master/): 分布式文件系统,支持rbd,POSIX API接口(ceph fs)和对象存储API,参考[这里](https://kubernetes.io/docs/user-guide/volumes/#rbd)
- [*MooseFS*](https://moosefs.com/documentation.html): 一个分布式的存储系统。需要先挂载到服务器Node上再通过kubernetes hostPath Volume挂载到容器中。
## 配置kubectl
### 安装kubectl
```
# OS X
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/darwin/amd64/kubectl
# Linux
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
# Windows
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/windows/amd64/kubectl.exe
```
### 配置kubectl访问你的kubernetes集群
编辑`~/.kube/config`这个配置文件,修改`Master-IP`的地址。如果使用SSL认证,则需要配置`certificate-authority``users`中的用户证书。如果是使用非SSL方式访问(比如通过8080端口),也可以去掉这些证书的配置。
```
apiVersion: v1
clusters:
- cluster:
certificate-authority: /path/to/ca.crt
server: https://[Master-IP]:443
name: minikube
contexts:
- context:
cluster: minikube
user: minikube
name: minikube
current-context: minikube
kind: Config
preferences: {}
users:
- name: minikube
user:
client-certificate: /path/to/apiserver.crt
client-key: /Users/wuyi/.minikube/apiserver.key
```
...@@ -18,11 +18,11 @@ PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/ ...@@ -18,11 +18,11 @@ PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/
- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须设置为`iOS`。在设置`CMAKE_SYSTEM_NAME=iOS`后,PaddlePaddle的CMake系统会自动编译所有的第三方依赖库,并且强制设置一些PaddlePaddle参数的值(`WITH_C_API=ON``WITH_GPU=OFF``WITH_AVX=OFF``WITH_PYTHON=OFF``WITH_RDMA=OFF`)。 - `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须设置为`iOS`。在设置`CMAKE_SYSTEM_NAME=iOS`后,PaddlePaddle的CMake系统会自动编译所有的第三方依赖库,并且强制设置一些PaddlePaddle参数的值(`WITH_C_API=ON``WITH_GPU=OFF``WITH_AVX=OFF``WITH_PYTHON=OFF``WITH_RDMA=OFF`)。
- `WITH_C_API`,是否编译C-API预测库,必须设置为ON。在iOS平台上只支持使用C-API来预测。 - `WITH_C_API`,是否编译C-API预测库,必须设置为ON。在iOS平台上只支持使用C-API来预测。
- `WITH_SWIG_PY`,必须设置为ON。在iOS平台上不支持通过swig调用来训练或者预测。 - `WITH_SWIG_PY`,必须设置为`OFF`。在iOS平台上不支持通过swig调用来训练或者预测。
iOS平台可选配置参数: iOS平台可选配置参数:
- `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS` - `IOS_PLATFORM`,可设置为`OS`(默认值)或`SIMULATOR`
- `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。 - `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。
- `SIMULATOR`,构建目标为`x86`架构的模拟器平台。 - `SIMULATOR`,构建目标为`x86`架构的模拟器平台。
- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构: - `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构:
......
# PaddlePaddle Compiling Guide for iOS
This tutorial will walk you through cross compiling the PaddlePaddle library for iOS from the source in MacOS.
## Preparation
Apple provides Xcode for cross-compiling and IDE for iOS development. Download from App store or [here](https://developer.apple.com/cn/xcode/). To verify your installation, run command as follows
```bash
$ xcodebuild -version
Xcode 9.0
Build version 9A235
```
## Cross-compiling configurations
PaddlePaddle provides cross-compiling toolchain configuration documentation [cmake/cross_compiling/ios.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/ios.cmake), which has some default settings for frequently used compilers.
There are some mandatory environment variables need to be set before cross compiling PaddlePaddle for iOS:
- `CMAKE_SYSTEM_NAME`, CMake compiling target platform name, has to be `iOS`. PaddlePaddle CMake will compile all the third party dependencies and enforce some parameters (`WITH_C_API=ON`, `WITH_GPU=OFF`, `WITH_AVX=OFF`, `WITH_PYTHON=OFF`,`WITH_RDMA=OFF`) when this variable is set with value `iOS`.
- `WITH_C_API`, Whether to compile inference C-API library, has to be `ON`, since C-API is the only supported interface for inferencing in iOS.
- `WITH_SWIG_PY`, has to be `OFF`. It's not supported to inference or train via swig in iOS.
Optional environment variables for iOS are:
- `IOS_PLATFORM`, either `OS` (default) or `SIMULATOR`.
- `OS`, build targets ARM-based physical devices like iPhone or iPad.
- `SIMULATOR`, build targets x86 architecture simulators.
- `IOS_ARCH`, target architecture. By default, all architecture types will be compiled. If you need to specify the architecture to compile for, please find valid values for different `IOS_PLATFORM` settings from the table below:
<table class="docutils">
<colgroup>
<col width="35%" />
<col width="65%" />
</colgroup>
<thead valign="bottom">
<tr class="row-odd">
<th class="head">IOS_PLATFORM</th>
<th class="head">IOS_ARCH</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even">
<td>OS</td>
<td>armv7, armv7s, arm64 </td>
</tr>
<tr class="row-odd">
<td>SIMULATOR</td>
<td>i386, x86_64 </td>
</tr>
</tbody>
</table>
- `IOS_DEPLOYMENT_TARGET`, minimum iOS version to deployment, `7.0` by default.
- `IOS_ENABLE_BITCODE`, whether to enable [Bitcode](https://developer.apple.com/library/content/documentation/IDEs/Conceptual/AppDistributionGuide/AppThinning/AppThinning.html#//apple_ref/doc/uid/TP40012582-CH35-SW3), values can be `ON/OFF`, `ON` by default.
- `IOS_USE_VECLIB_FOR_BLAS`, whether to use [vecLib](https://developer.apple.com/documentation/accelerate/veclib) framework for BLAS computing. values can be `ON/OFF`, `OFF` by default.
- `IOS_DEVELOPMENT_ROOT`, the path to `Developer` directory, can be explicitly set with your `/path/to/platform/Developer`. If left blank, PaddlePaddle will automatically pick the Xcode corresponding `platform`'s `Developer` directory based on your `IOS_PLATFORM` value.
- `IOS_SDK_ROOT`, the path to `SDK` root, can be explicitly set with your `/path/to/platform/Developer/SDKs/SDK`. if left black, PaddlePaddle will pick the latest SDK in the directory of `IOS_DEVELOPMENT_ROOT`.
other settings:
- `USE_EIGEN_FOR_BLAS`, whether to use Eigen for matrix computing. effective when `IOS_USE_VECLIB_FOR_BLAS=OFF`. Values can be `ON/OFF`, `OFF` by default.
- `HOST_C/CXX_COMPILER`, host C/C++ compiler. Uses value from environment variable `CC/CXX` by default or `cc/c++` if `CC/CXX` doesn't exist.
some typical cmake configurations:
```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=OS \
-DIOS_ARCH="armv7;arm64" \
-DIOS_ENABLE_BITCODE=ON \
-DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \
..
```
```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=SIMULATOR \
-DIOS_ARCH="x86_64" \
-DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
-DWITH_C_API=ON \
-DWITH_TESTING=OFF \
-DWITH_SWIG_PY=OFF \
..
```
You can set other compiling parameters for your own need. I.E. if you are trying to minimize the library size, set `CMAKE_BUILD_TYPE` with `MinSizeRel`; or if the performance is your concern, set `CMAKE_BUILD_TYPE` with `Release`. You can even manipulate the PaddlePaddle compiling procedure by manually set `CMAKE_C/CXX_FLAGS` values.
**TIPS for a better performance**:
- set `CMAKE_BUILD_TYPE` with `Release`
- set `IOS_USE_VECLIB_FOR_BLAS` with `ON`
## Compile and install
After CMake, run following commands, PaddlePaddle will download the compile 3rd party dependencies, compile and install PaddlePaddle inference library.
```
$ make
$ make install
```
Please Note: if you compiled PaddlePaddle in the source directory for other platforms, do remove `third_party` and `build` directory within the source with `rm -rf` to ensure that all the 3rd party libraries dependencies and PaddlePaddle is newly compiled with current CMake configuration.
`your/path/to/install` directory will have following directories after `compile` and `install`:
- `include`, contains all the C-API header files.
- `lib`, contains PaddlePaddle C-API static library.
- `third_party` contains all the 3rd party libraries.
Please note: if PaddlePaddle library need to support both physical devices and simulators, you will need to compile correspondingly, then merge fat library with `lipo`.
Now you will have PaddlePaddle library compiled and installed, the fat library can be used in deep learning related iOS APPs. Please refer to C-API documentation for usage guides.
...@@ -5,4 +5,5 @@ MOBILE ...@@ -5,4 +5,5 @@ MOBILE
:maxdepth: 1 :maxdepth: 1
cross_compiling_for_android_en.md cross_compiling_for_android_en.md
cross_compiling_for_ios_en.md
cross_compiling_for_raspberry_en.md cross_compiling_for_raspberry_en.md
...@@ -58,3 +58,6 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry ...@@ -58,3 +58,6 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry
proto_desc) proto_desc)
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
cc_library(init SRCS init.cc DEPS gflags executor place stringpiece)
cc_test(init_test SRCS init_test.cc DEPS init)
...@@ -19,42 +19,42 @@ limitations under the License. */ ...@@ -19,42 +19,42 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
Attribute GetAttrValue(const OpDesc::Attr& attr_desc) { Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc) {
switch (attr_desc.type()) { switch (attr_desc.type()) {
case framework::AttrType::BOOLEAN: { case proto::AttrType::BOOLEAN: {
return attr_desc.b(); return attr_desc.b();
} }
case framework::AttrType::INT: { case proto::AttrType::INT: {
return attr_desc.i(); return attr_desc.i();
} }
case framework::AttrType::FLOAT: { case proto::AttrType::FLOAT: {
return attr_desc.f(); return attr_desc.f();
} }
case framework::AttrType::STRING: { case proto::AttrType::STRING: {
return attr_desc.s(); return attr_desc.s();
} }
case framework::AttrType::BOOLEANS: { case proto::AttrType::BOOLEANS: {
std::vector<bool> val(attr_desc.bools_size()); std::vector<bool> val(attr_desc.bools_size());
for (int i = 0; i < attr_desc.bools_size(); ++i) { for (int i = 0; i < attr_desc.bools_size(); ++i) {
val[i] = attr_desc.bools(i); val[i] = attr_desc.bools(i);
} }
return val; return val;
} }
case framework::AttrType::INTS: { case proto::AttrType::INTS: {
std::vector<int> val(attr_desc.ints_size()); std::vector<int> val(attr_desc.ints_size());
for (int i = 0; i < attr_desc.ints_size(); ++i) { for (int i = 0; i < attr_desc.ints_size(); ++i) {
val[i] = attr_desc.ints(i); val[i] = attr_desc.ints(i);
} }
return val; return val;
} }
case framework::AttrType::FLOATS: { case proto::AttrType::FLOATS: {
std::vector<float> val(attr_desc.floats_size()); std::vector<float> val(attr_desc.floats_size());
for (int i = 0; i < attr_desc.floats_size(); ++i) { for (int i = 0; i < attr_desc.floats_size(); ++i) {
val[i] = attr_desc.floats(i); val[i] = attr_desc.floats(i);
} }
return val; return val;
} }
case framework::AttrType::STRINGS: { case proto::AttrType::STRINGS: {
std::vector<std::string> val(attr_desc.strings_size()); std::vector<std::string> val(attr_desc.strings_size());
for (int i = 0; i < attr_desc.strings_size(); ++i) { for (int i = 0; i < attr_desc.strings_size(); ++i) {
val[i] = attr_desc.strings(i); val[i] = attr_desc.strings(i);
......
...@@ -27,12 +27,12 @@ limitations under the License. */ ...@@ -27,12 +27,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
template <typename T> template <typename T>
inline AttrType AttrTypeID() { inline proto::AttrType AttrTypeID() {
Attribute tmp = T(); Attribute tmp = T();
return static_cast<AttrType>(tmp.which() - 1); return static_cast<proto::AttrType>(tmp.which() - 1);
} }
Attribute GetAttrValue(const OpDesc::Attr& attr_desc); Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc);
class AttrReader { class AttrReader {
public: public:
......
...@@ -341,7 +341,7 @@ static void CreateGradVarInBlock( ...@@ -341,7 +341,7 @@ static void CreateGradVarInBlock(
auto* param = block_desc->FindVarRecursive(pname); auto* param = block_desc->FindVarRecursive(pname);
auto* grad = block_desc->FindVar(arg); auto* grad = block_desc->FindVar(arg);
if (param == nullptr) { if (param == nullptr) {
grad->SetDataType(DataType::FP32); grad->SetDataType(proto::DataType::FP32);
} else { } else {
grad->SetDataType(param->GetDataType()); grad->SetDataType(param->GetDataType());
} }
......
...@@ -166,7 +166,7 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker { ...@@ -166,7 +166,7 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker {
class SumOpMaker : public framework::OpProtoAndCheckerMaker { class SumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SumOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SumOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensors of sum operator.").AsDuplicable(); AddInput("X", "the input tensors of sum operator.").AsDuplicable();
AddOutput("Out", "the output tensor of sum operator."); AddOutput("Out", "the output tensor of sum operator.");
......
...@@ -128,22 +128,22 @@ BlockDescBind *BlockDescBind::ParentBlock() const { ...@@ -128,22 +128,22 @@ BlockDescBind *BlockDescBind::ParentBlock() const {
return prog_->MutableBlock(static_cast<size_t>(this->desc_->parent_idx())); return prog_->MutableBlock(static_cast<size_t>(this->desc_->parent_idx()));
} }
BlockDesc *BlockDescBind::Proto() { proto::BlockDesc *BlockDescBind::Proto() {
Flush(); Flush();
return desc_; return desc_;
} }
BlockDescBind::BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) BlockDescBind::BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) { : prog_(prog), desc_(desc), need_update_(false) {
for (const VarDesc &var_desc : desc_->vars()) { for (const proto::VarDesc &var_desc : desc_->vars()) {
vars_[var_desc.name()].reset(new VarDescBind(var_desc)); vars_[var_desc.name()].reset(new VarDescBind(var_desc));
} }
for (const OpDesc &op_desc : desc_->ops()) { for (const proto::OpDesc &op_desc : desc_->ops()) {
ops_.emplace_back(new OpDescBind(op_desc, prog)); ops_.emplace_back(new OpDescBind(op_desc, prog));
} }
} }
BlockDescBind::BlockDescBind(const BlockDescBind &other, BlockDesc *desc, BlockDescBind::BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc,
ProgramDescBind *prog) ProgramDescBind *prog)
: prog_(prog), desc_(desc) { : prog_(prog), desc_(desc) {
need_update_ = true; need_update_ = true;
......
...@@ -36,9 +36,9 @@ class ProgramDescBind; ...@@ -36,9 +36,9 @@ class ProgramDescBind;
class BlockDescBind { class BlockDescBind {
public: public:
BlockDescBind(ProgramDescBind *prog, BlockDesc *desc); BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc);
BlockDescBind(const BlockDescBind &other, BlockDesc *desc, BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc,
ProgramDescBind *prog); ProgramDescBind *prog);
~BlockDescBind() { ~BlockDescBind() {
...@@ -88,7 +88,7 @@ class BlockDescBind { ...@@ -88,7 +88,7 @@ class BlockDescBind {
void Flush(); void Flush();
BlockDesc *Proto(); proto::BlockDesc *Proto();
ProgramDescBind *Program() { return this->prog_; } ProgramDescBind *Program() { return this->prog_; }
...@@ -97,8 +97,8 @@ class BlockDescBind { ...@@ -97,8 +97,8 @@ class BlockDescBind {
void ClearPBVars(); void ClearPBVars();
private: private:
ProgramDescBind *prog_; // not_own ProgramDescBind *prog_; // not_own
BlockDesc *desc_; // not_own proto::BlockDesc *desc_; // not_own
bool need_update_; bool need_update_;
std::deque<std::unique_ptr<OpDescBind>> ops_; std::deque<std::unique_ptr<OpDescBind>> ops_;
......
...@@ -20,7 +20,8 @@ ...@@ -20,7 +20,8 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
inline DataType ToDataType(std::type_index type) { inline proto::DataType ToDataType(std::type_index type) {
using namespace paddle::framework::proto;
if (typeid(float).hash_code() == type.hash_code()) { if (typeid(float).hash_code() == type.hash_code()) {
return DataType::FP32; return DataType::FP32;
} else if (typeid(double).hash_code() == type.hash_code()) { } else if (typeid(double).hash_code() == type.hash_code()) {
...@@ -36,7 +37,8 @@ inline DataType ToDataType(std::type_index type) { ...@@ -36,7 +37,8 @@ inline DataType ToDataType(std::type_index type) {
} }
} }
inline std::type_index ToTypeIndex(DataType type) { inline std::type_index ToTypeIndex(proto::DataType type) {
using namespace paddle::framework::proto;
switch (type) { switch (type) {
case DataType::FP32: case DataType::FP32:
return typeid(float); return typeid(float);
...@@ -54,7 +56,8 @@ inline std::type_index ToTypeIndex(DataType type) { ...@@ -54,7 +56,8 @@ inline std::type_index ToTypeIndex(DataType type) {
} }
template <typename Visitor> template <typename Visitor>
inline void VisitDataType(DataType type, Visitor visitor) { inline void VisitDataType(proto::DataType type, Visitor visitor) {
using namespace paddle::framework::proto;
switch (type) { switch (type) {
case DataType::FP32: case DataType::FP32:
visitor.template operator()<float>(); visitor.template operator()<float>();
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <sstream> #include <sstream>
#include <vector> #include <vector>
......
...@@ -90,7 +90,7 @@ struct OpInfoFiller<T, kOperator> { ...@@ -90,7 +90,7 @@ struct OpInfoFiller<T, kOperator> {
template <typename T> template <typename T>
struct OpInfoFiller<T, kOpProtoAndCheckerMaker> { struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->proto_ = new OpProto; info->proto_ = new proto::OpProto;
info->checker_ = new OpAttrChecker(); info->checker_ = new OpAttrChecker();
auto maker = T(info->proto_, info->checker_); auto maker = T(info->proto_, info->checker_);
maker.Validate(); maker.Validate();
......
...@@ -33,48 +33,28 @@ namespace framework { ...@@ -33,48 +33,28 @@ namespace framework {
const std::string kFeedOpType = "feed"; const std::string kFeedOpType = "feed";
const std::string kFetchOpType = "fetch"; const std::string kFetchOpType = "fetch";
Executor::Executor(const std::vector<platform::Place>& places) : own_(true) { DeviceContextPool* DeviceContextPool::pool = nullptr;
PADDLE_ENFORCE_GT(places.size(), 0);
device_contexts_.resize(places.size());
for (size_t i = 0; i < places.size(); i++) {
if (platform::is_cpu_place(places[i])) {
device_contexts_[i] = new platform::CPUDeviceContext(
boost::get<platform::CPUPlace>(places[i]));
} else if (platform::is_gpu_place(places[i])) {
#ifdef PADDLE_WITH_CUDA
device_contexts_[i] = new platform::CUDADeviceContext(
boost::get<platform::GPUPlace>(places[i]));
#else
PADDLE_THROW(
"'GPUPlace' is not supported, Please re-compile with WITH_GPU "
"option");
#endif
}
}
}
Executor::~Executor() { Executor::Executor(const std::vector<platform::Place>& places) {
if (own_) { DeviceContextPool& pool = DeviceContextPool::Get();
for (auto& device_context : device_contexts_) { auto borrowed_contexts = pool.Borrow(places);
delete device_context; device_contexts_.swap(borrowed_contexts);
}
}
} }
static void CreateTensor(Variable* var, VarDesc::VarType var_type) { static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
if (var_type == VarDesc::LOD_TENSOR) { if (var_type == proto::VarDesc::LOD_TENSOR) {
var->GetMutable<LoDTensor>(); var->GetMutable<LoDTensor>();
} else if (var_type == VarDesc::SELECTED_ROWS) { } else if (var_type == proto::VarDesc::SELECTED_ROWS) {
var->GetMutable<SelectedRows>(); var->GetMutable<SelectedRows>();
} else if (var_type == VarDesc::FEED_MINIBATCH) { } else if (var_type == proto::VarDesc::FEED_MINIBATCH) {
var->GetMutable<FeedFetchList>(); var->GetMutable<FeedFetchList>();
} else if (var_type == VarDesc::FETCH_LIST) { } else if (var_type == proto::VarDesc::FETCH_LIST) {
var->GetMutable<FeedFetchList>(); var->GetMutable<FeedFetchList>();
} else if (var_type == VarDesc::STEP_SCOPES) { } else if (var_type == proto::VarDesc::STEP_SCOPES) {
var->GetMutable<std::vector<framework::Scope>>(); var->GetMutable<std::vector<framework::Scope>>();
} else if (var_type == VarDesc::LOD_RANK_TABLE) { } else if (var_type == proto::VarDesc::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>(); var->GetMutable<LoDRankTable>();
} else if (var_type == VarDesc::LOD_TENSOR_ARRAY) { } else if (var_type == proto::VarDesc::LOD_TENSOR_ARRAY) {
var->GetMutable<LoDTensorArray>(); var->GetMutable<LoDTensorArray>();
} else { } else {
PADDLE_THROW( PADDLE_THROW(
...@@ -132,8 +112,5 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, ...@@ -132,8 +112,5 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
} }
} }
Executor::Executor(const platform::DeviceContext& device)
: device_contexts_({&device}), own_(false) {}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -14,19 +14,98 @@ limitations under the License. */ ...@@ -14,19 +14,98 @@ limitations under the License. */
#pragma once #pragma once
#include <map>
#include <unordered_map>
#include "paddle/framework/op_info.h" #include "paddle/framework/op_info.h"
#include "paddle/framework/program_desc.h" #include "paddle/framework/program_desc.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class DeviceContextPool {
public:
static DeviceContextPool& Get() {
PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!");
return *pool;
}
static DeviceContextPool& Create(const std::vector<platform::Place>& places) {
if (pool == nullptr) {
pool = new DeviceContextPool(places);
}
return *pool;
}
std::vector<const platform::DeviceContext*> Borrow(
const std::vector<platform::Place>& places) {
PADDLE_ENFORCE_GT(places.size(), 0);
PADDLE_ENFORCE_LE(places.size(), device_contexts_.size());
std::vector<const platform::DeviceContext*> borrowed_contexts;
for (auto& place : places) {
auto range = device_contexts_.equal_range(place);
if (range.first == range.second) {
PADDLE_THROW(
"'Place' is not supported, Please re-compile with WITH_GPU "
"option");
}
// TODO(dzhwinter) : assign the first found device. Will enhanced later.
// device load balancer maybe useful here.
borrowed_contexts.emplace_back(range.first->second);
}
return borrowed_contexts;
}
explicit DeviceContextPool(const std::vector<platform::Place>& places) {
PADDLE_ENFORCE_GT(places.size(), 0);
for (size_t i = 0; i < places.size(); i++) {
if (platform::is_cpu_place(places[i])) {
device_contexts_.emplace(
places[i], new platform::CPUDeviceContext(
boost::get<platform::CPUPlace>(places[i])));
} else if (platform::is_gpu_place(places[i])) {
#ifdef PADDLE_WITH_CUDA
device_contexts_.emplace(
places[i], new platform::CUDADeviceContext(
boost::get<platform::GPUPlace>(places[i])));
#else
PADDLE_THROW(
"'GPUPlace' is not supported, Please re-compile with WITH_GPU "
"option");
#endif
}
}
}
~DeviceContextPool() {}
private:
static DeviceContextPool* pool;
struct Hash {
std::hash<int> hash_;
size_t operator()(const platform::Place& place) const {
return hash_(place.which());
}
};
std::unordered_multimap<const platform::Place, const platform::DeviceContext*,
Hash>
device_contexts_;
DISABLE_COPY_AND_ASSIGN(DeviceContextPool);
};
class Executor { class Executor {
public: public:
// TODO(dzhwinter) : Do not rely on this function, it will be removed
explicit Executor(const platform::DeviceContext& device)
: Executor(std::vector<platform::Place>({device.GetPlace()})) {}
explicit Executor(const platform::Place& place)
: Executor(std::vector<platform::Place>({place})) {}
explicit Executor(const std::vector<platform::Place>& places); explicit Executor(const std::vector<platform::Place>& places);
explicit Executor(const platform::DeviceContext& devices);
~Executor();
/* @Brief /* @Brief
* Runtime evaluation of the given ProgramDesc under certain Scope * Runtime evaluation of the given ProgramDesc under certain Scope
...@@ -39,7 +118,6 @@ class Executor { ...@@ -39,7 +118,6 @@ class Executor {
private: private:
std::vector<const platform::DeviceContext*> device_contexts_; std::vector<const platform::DeviceContext*> device_contexts_;
bool own_;
}; };
} // namespace framework } // namespace framework
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
syntax = "proto2"; syntax = "proto2";
option optimize_for = LITE_RUNTIME; option optimize_for = LITE_RUNTIME;
package paddle.framework; package paddle.framework.proto;
enum AttrType { enum AttrType {
INT = 0; INT = 0;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <string>
#include "paddle/framework/executor.h"
#include "paddle/framework/init.h"
#include "paddle/platform/place.h"
#include "paddle/string/piece.h"
namespace paddle {
namespace framework {
std::once_flag gflags_init_flag;
// TODO(qijun) move init gflags to init.cc
void InitGflags(std::vector<std::string> &argv) {
std::call_once(gflags_init_flag, [&]() {
int argc = argv.size();
char **arr = new char *[argv.size()];
std::string line;
for (size_t i = 0; i < argv.size(); i++) {
arr[i] = &argv[i][0];
line += argv[i];
line += ' ';
}
google::ParseCommandLineFlags(&argc, &arr, true);
VLOG(1) << "Init commandline: " << line;
});
}
bool InitDevices(const std::vector<std::string> &devices) {
// device format
// CPU
// GPU:1
// TODO(dzhwinter) : add device format annotation for users.
std::vector<platform::Place> places;
for (auto &device : devices) {
auto p = string::Piece(device);
if (string::Find(p, ':', 0) == string::Piece::npos) {
places.emplace_back(platform::CPUPlace());
} else if (string::HasPrefix(p, "GPU")) {
#ifdef PADDLE_WITH_CUDA
auto pos = string::RFind(p, ':', string::Piece::npos);
auto number = device.substr(pos + 1);
places.emplace_back(platform::GPUPlace(std::stoi(number)));
#else
LOG(WARNING)
<< "'GPU' is not supported, Please re-compile with WITH_GPU option";
#endif
} else {
return false;
}
}
if (std::find_if(places.begin(), places.end(),
[&](const platform::Place &place) {
return platform::is_cpu_place(place);
}) == places.end()) {
places.emplace_back(platform::CPUPlace());
LOG(WARNING) << "Not specified any device, use CPU by Default.";
}
DeviceContextPool::Create(places);
return true;
return true;
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <mutex>
#include "gflags/gflags.h"
#include "glog/logging.h"
namespace paddle {
namespace framework {
void InitGflags(std::vector<std::string> &argv);
bool InitDevices(const std::vector<std::string> &devices);
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/framework/init.h"
TEST(Init, InitDevices) {
using paddle::framework::InitDevices;
std::vector<std::string> ds1 = {"CPU"};
ASSERT_EQ(InitDevices(ds1), true);
#ifdef PADDLE_WITH_CUDA
std::vector<std::string> ds2 = {"CPU", "GPU:0", "GPU:1"};
ASSERT_EQ(InitDevices(ds2), true);
#endif
}
...@@ -46,4 +46,13 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) { ...@@ -46,4 +46,13 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
} }
} // namespace framework } // namespace framework
std::ostream& operator<<(std::ostream& out,
const framework::LoDRankTable& table) {
out << "NumOfSequence " << table.items().size() << "\n";
for (auto& each_item : table.items()) {
out << "\tSeq #" << each_item.index << ", Len=" << each_item.length << "\n";
}
return out;
}
} // namespace paddle } // namespace paddle
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <iosfwd>
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
namespace paddle { namespace paddle {
...@@ -52,4 +53,8 @@ class LoDRankTable { ...@@ -52,4 +53,8 @@ class LoDRankTable {
}; };
} // namespace framework } // namespace framework
std::ostream& operator<<(std::ostream& out,
const framework::LoDRankTable& table);
} // namespace paddle } // namespace paddle
...@@ -197,7 +197,7 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor, ...@@ -197,7 +197,7 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor,
{ // the 2nd field, tensor description { // the 2nd field, tensor description
// int32_t size // int32_t size
// void* protobuf message // void* protobuf message
framework::TensorDesc desc; proto::TensorDesc desc;
desc.set_data_type(framework::ToDataType(tensor.type())); desc.set_data_type(framework::ToDataType(tensor.type()));
auto dims = framework::vectorize(tensor.dims()); auto dims = framework::vectorize(tensor.dims());
auto *pb_dims = desc.mutable_dims(); auto *pb_dims = desc.mutable_dims();
...@@ -262,7 +262,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { ...@@ -262,7 +262,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
uint32_t version; uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version)); is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
framework::TensorDesc desc; proto::TensorDesc desc;
{ // int32_t size { // int32_t size
// proto buffer // proto buffer
int32_t size; int32_t size;
...@@ -281,16 +281,16 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { ...@@ -281,16 +281,16 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
void *buf; void *buf;
platform::Place cpu = platform::CPUPlace(); platform::Place cpu = platform::CPUPlace();
switch (desc.data_type()) { switch (desc.data_type()) {
case framework::FP32: case proto::FP32:
buf = tensor->mutable_data<float>(cpu); buf = tensor->mutable_data<float>(cpu);
break; break;
case framework::FP64: case proto::FP64:
buf = tensor->mutable_data<double>(cpu); buf = tensor->mutable_data<double>(cpu);
break; break;
case framework::INT32: case proto::INT32:
buf = tensor->mutable_data<int>(cpu); buf = tensor->mutable_data<int>(cpu);
break; break;
case framework::INT64: case proto::INT64:
buf = tensor->mutable_data<int64_t>(cpu); buf = tensor->mutable_data<int64_t>(cpu);
break; break;
default: default:
......
...@@ -184,6 +184,18 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, ...@@ -184,6 +184,18 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
return tensor; return tensor;
} }
// Get the absolute offset of a lod[start_level][start_idx:end_idx] and
// relative length of details for every levels(i.e., [start_level: ]).
//
// For example,
// lod = [[0, 3, 4, 8], [0, 9, 10, 11, 13, 17, 19, 22, 24]]
// start_level = 0
// start_idx = 1
// end_idx = 3
//
// Returns:
// LoD = [[1, 4], [2, 4, 2, 3, 2]]
// pair<size_t, size_t> = {11, 24}
std::pair<LoD, std::pair<size_t, size_t>> GetSubLoDAndAbsoluteOffset( std::pair<LoD, std::pair<size_t, size_t>> GetSubLoDAndAbsoluteOffset(
const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level); const LoD& lod, size_t start_idx, size_t end_idx, size_t start_level);
......
...@@ -58,11 +58,11 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -58,11 +58,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {
PADDLE_ENFORCE_LT(j, Outputs(out).size()); PADDLE_ENFORCE_LT(j, Outputs(out).size());
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != VarDesc::LOD_TENSOR) { if (in_var->GetType() != proto::VarDesc::LOD_TENSOR) {
VLOG(3) << "input " << in << " is not LodTensor"; VLOG(3) << "input " << in << " is not LodTensor";
return; return;
} }
PADDLE_ENFORCE_EQ(in_var->GetType(), VarDesc::LOD_TENSOR, PADDLE_ENFORCE_EQ(in_var->GetType(), proto::VarDesc::LOD_TENSOR,
"The %d-th output of Output(%s) must be LoDTensor.", j, "The %d-th output of Output(%s) must be LoDTensor.", j,
out); out);
out_var->SetLoDLevel(in_var->GetLodLevel()); out_var->SetLoDLevel(in_var->GetLodLevel());
...@@ -70,7 +70,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -70,7 +70,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
bool IsRuntime() const override; bool IsRuntime() const override;
protected: protected:
VarDesc::VarType GetVarType(const std::string &name) const override; proto::VarDesc::VarType GetVarType(const std::string &name) const override;
DDim GetDim(const std::string &name) const override; DDim GetDim(const std::string &name) const override;
...@@ -90,12 +90,12 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, ...@@ -90,12 +90,12 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
need_update_ = true; need_update_ = true;
} }
OpDescBind::OpDescBind(const OpDesc &desc, ProgramDescBind *prog) OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog)
: desc_(desc), need_update_(false) { : desc_(desc), need_update_(false) {
// restore inputs_ // restore inputs_
int input_size = desc_.inputs_size(); int input_size = desc_.inputs_size();
for (int i = 0; i < input_size; ++i) { for (int i = 0; i < input_size; ++i) {
const OpDesc::Var &var = desc_.inputs(i); const proto::OpDesc::Var &var = desc_.inputs(i);
std::vector<std::string> &args = inputs_[var.parameter()]; std::vector<std::string> &args = inputs_[var.parameter()];
int argu_size = var.arguments_size(); int argu_size = var.arguments_size();
args.reserve(argu_size); args.reserve(argu_size);
...@@ -106,7 +106,7 @@ OpDescBind::OpDescBind(const OpDesc &desc, ProgramDescBind *prog) ...@@ -106,7 +106,7 @@ OpDescBind::OpDescBind(const OpDesc &desc, ProgramDescBind *prog)
// restore outputs_ // restore outputs_
int output_size = desc_.outputs_size(); int output_size = desc_.outputs_size();
for (int i = 0; i < output_size; ++i) { for (int i = 0; i < output_size; ++i) {
const OpDesc::Var &var = desc_.outputs(i); const proto::OpDesc::Var &var = desc_.outputs(i);
std::vector<std::string> &args = outputs_[var.parameter()]; std::vector<std::string> &args = outputs_[var.parameter()];
int argu_size = var.arguments_size(); int argu_size = var.arguments_size();
args.reserve(argu_size); args.reserve(argu_size);
...@@ -115,9 +115,9 @@ OpDescBind::OpDescBind(const OpDesc &desc, ProgramDescBind *prog) ...@@ -115,9 +115,9 @@ OpDescBind::OpDescBind(const OpDesc &desc, ProgramDescBind *prog)
} }
} }
// restore attrs_ // restore attrs_
for (const OpDesc::Attr &attr : desc_.attrs()) { for (const proto::OpDesc::Attr &attr : desc_.attrs()) {
std::string attr_name = attr.name(); std::string attr_name = attr.name();
if (attr.type() != AttrType::BLOCK) { if (attr.type() != proto::AttrType::BLOCK) {
attrs_[attr_name] = GetAttrValue(attr); attrs_[attr_name] = GetAttrValue(attr);
} else { } else {
auto bid = attr.block_idx(); auto bid = attr.block_idx();
...@@ -126,7 +126,7 @@ OpDescBind::OpDescBind(const OpDesc &desc, ProgramDescBind *prog) ...@@ -126,7 +126,7 @@ OpDescBind::OpDescBind(const OpDesc &desc, ProgramDescBind *prog)
} }
} }
OpDesc *OpDescBind::Proto() { proto::OpDesc *OpDescBind::Proto() {
Flush(); Flush();
return &desc_; return &desc_;
} }
...@@ -175,10 +175,10 @@ void OpDescBind::SetOutput(const std::string &param_name, ...@@ -175,10 +175,10 @@ void OpDescBind::SetOutput(const std::string &param_name,
this->outputs_[param_name] = args; this->outputs_[param_name] = args;
} }
AttrType OpDescBind::GetAttrType(const std::string &name) const { proto::AttrType OpDescBind::GetAttrType(const std::string &name) const {
auto it = attrs_.find(name); auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return static_cast<AttrType>(it->second.which() - 1); return static_cast<proto::AttrType>(it->second.which() - 1);
} }
std::vector<std::string> OpDescBind::AttrNames() const { std::vector<std::string> OpDescBind::AttrNames() const {
...@@ -253,8 +253,8 @@ void OpDescBind::RenameInput(const std::string &old_name, ...@@ -253,8 +253,8 @@ void OpDescBind::RenameInput(const std::string &old_name,
} }
struct SetAttrDescVisitor : public boost::static_visitor<void> { struct SetAttrDescVisitor : public boost::static_visitor<void> {
explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} explicit SetAttrDescVisitor(proto::OpDesc::Attr *attr) : attr_(attr) {}
mutable OpDesc::Attr *attr_; mutable proto::OpDesc::Attr *attr_;
void operator()(int v) const { attr_->set_i(v); } void operator()(int v) const { attr_->set_i(v); }
void operator()(float v) const { attr_->set_f(v); } void operator()(float v) const { attr_->set_f(v); }
void operator()(const std::string &v) const { attr_->set_s(v); } void operator()(const std::string &v) const { attr_->set_s(v); }
...@@ -272,7 +272,9 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -272,7 +272,9 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(const std::vector<bool> &v) const { void operator()(const std::vector<bool> &v) const {
VectorToRepeated(v, attr_->mutable_bools()); VectorToRepeated(v, attr_->mutable_bools());
} }
void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->idx()); } void operator()(proto::BlockDesc *desc) const {
attr_->set_block_idx(desc->idx());
}
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
}; };
...@@ -297,7 +299,7 @@ void OpDescBind::Flush() { ...@@ -297,7 +299,7 @@ void OpDescBind::Flush() {
auto *attr_desc = desc_.add_attrs(); auto *attr_desc = desc_.add_attrs();
attr_desc->set_name(attr.first); attr_desc->set_name(attr.first);
attr_desc->set_type( attr_desc->set_type(
static_cast<framework::AttrType>(attr.second.which() - 1)); static_cast<proto::AttrType>(attr.second.which() - 1));
SetAttrDescVisitor visitor(attr_desc); SetAttrDescVisitor visitor(attr_desc);
boost::apply_visitor(visitor, attr.second); boost::apply_visitor(visitor, attr.second);
} }
...@@ -375,7 +377,7 @@ void OpDescBind::InferVarType(BlockDescBind *block) const { ...@@ -375,7 +377,7 @@ void OpDescBind::InferVarType(BlockDescBind *block) const {
for (auto &out_pair : this->outputs_) { for (auto &out_pair : this->outputs_) {
for (auto &out_var_name : out_pair.second) { for (auto &out_var_name : out_pair.second) {
block->FindRecursiveOrCreateVar(out_var_name) block->FindRecursiveOrCreateVar(out_var_name)
->SetType(VarDesc::LOD_TENSOR); ->SetType(proto::VarDesc::LOD_TENSOR);
} }
} }
} }
...@@ -484,7 +486,7 @@ void CompileTimeInferShapeContext::SetDim(const std::string &name, ...@@ -484,7 +486,7 @@ void CompileTimeInferShapeContext::SetDim(const std::string &name,
} }
bool CompileTimeInferShapeContext::IsRuntime() const { return false; } bool CompileTimeInferShapeContext::IsRuntime() const { return false; }
VarDesc::VarType CompileTimeInferShapeContext::GetVarType( proto::VarDesc::VarType CompileTimeInferShapeContext::GetVarType(
const std::string &name) const { const std::string &name) const {
return block_.FindVarRecursive(name)->GetType(); return block_.FindVarRecursive(name)->GetType();
} }
......
...@@ -33,9 +33,9 @@ class OpDescBind { ...@@ -33,9 +33,9 @@ class OpDescBind {
OpDescBind(const std::string &type, const VariableNameMap &inputs, OpDescBind(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs); const VariableNameMap &outputs, const AttributeMap &attrs);
OpDescBind(const OpDesc &desc, ProgramDescBind *prog); OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog);
OpDesc *Proto(); proto::OpDesc *Proto();
std::string Type() const { return desc_.type(); } std::string Type() const { return desc_.type(); }
...@@ -59,7 +59,7 @@ class OpDescBind { ...@@ -59,7 +59,7 @@ class OpDescBind {
return attrs_.find(name) != attrs_.end(); return attrs_.find(name) != attrs_.end();
} }
AttrType GetAttrType(const std::string &name) const; proto::AttrType GetAttrType(const std::string &name) const;
std::vector<std::string> AttrNames() const; std::vector<std::string> AttrNames() const;
...@@ -126,7 +126,7 @@ class OpDescBind { ...@@ -126,7 +126,7 @@ class OpDescBind {
return ret_val; return ret_val;
} }
OpDesc desc_; proto::OpDesc desc_;
VariableNameMap inputs_; VariableNameMap inputs_;
VariableNameMap outputs_; VariableNameMap outputs_;
AttributeMap attrs_; AttributeMap attrs_;
......
...@@ -34,7 +34,7 @@ class InferShapeBase { ...@@ -34,7 +34,7 @@ class InferShapeBase {
struct OpInfo { struct OpInfo {
OpCreator creator_; OpCreator creator_;
GradOpMakerFN grad_op_maker_; GradOpMakerFN grad_op_maker_;
OpProto* proto_{nullptr}; proto::OpProto* proto_{nullptr};
OpAttrChecker* checker_{nullptr}; OpAttrChecker* checker_{nullptr};
InferVarTypeFN infer_var_type_; InferVarTypeFN infer_var_type_;
InferShapeFN infer_shape_; InferShapeFN infer_shape_;
...@@ -43,7 +43,7 @@ struct OpInfo { ...@@ -43,7 +43,7 @@ struct OpInfo {
return proto_ != nullptr && checker_ != nullptr; return proto_ != nullptr && checker_ != nullptr;
} }
const OpProto& Proto() const { const proto::OpProto& Proto() const {
PADDLE_ENFORCE_NOT_NULL(proto_, "Operator Proto has not been registered"); PADDLE_ENFORCE_NOT_NULL(proto_, "Operator Proto has not been registered");
PADDLE_ENFORCE(proto_->IsInitialized(), PADDLE_ENFORCE(proto_->IsInitialized(),
"Operator Proto must be initialized in op info"); "Operator Proto must be initialized in op info");
......
...@@ -22,6 +22,8 @@ namespace framework { ...@@ -22,6 +22,8 @@ namespace framework {
// this class not only make proto but also init attribute checkers. // this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker { class OpProtoAndCheckerMaker {
public: public:
using OpProto = proto::OpProto;
using OpAttrChecker = framework::OpAttrChecker;
OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: proto_(proto), op_checker_(op_checker) {} : proto_(proto), op_checker_(op_checker) {}
...@@ -80,7 +82,7 @@ class OpProtoAndCheckerMaker { ...@@ -80,7 +82,7 @@ class OpProtoAndCheckerMaker {
class NOPMaker : public OpProtoAndCheckerMaker { class NOPMaker : public OpProtoAndCheckerMaker {
public: public:
NOPMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) NOPMaker(OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {} : OpProtoAndCheckerMaker(proto, op_checker) {}
}; };
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
public: public:
TestAttrProtoMaker(paddle::framework::OpProto* proto, TestAttrProtoMaker(paddle::framework::proto::OpProto* proto,
paddle::framework::OpAttrChecker* op_checker) paddle::framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<float>("scale", "scale of test op"); AddAttr<float>("scale", "scale of test op");
...@@ -27,7 +27,7 @@ class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { ...@@ -27,7 +27,7 @@ class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
}; };
TEST(ProtoMaker, DuplicatedAttr) { TEST(ProtoMaker, DuplicatedAttr) {
paddle::framework::OpProto op_proto; paddle::framework::proto::OpProto op_proto;
paddle::framework::OpAttrChecker op_checker; paddle::framework::OpAttrChecker op_checker;
auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker); auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
...@@ -35,7 +35,7 @@ TEST(ProtoMaker, DuplicatedAttr) { ...@@ -35,7 +35,7 @@ TEST(ProtoMaker, DuplicatedAttr) {
class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
public: public:
TestInOutProtoMaker(paddle::framework::OpProto* proto, TestInOutProtoMaker(paddle::framework::proto::OpProto* proto,
paddle::framework::OpAttrChecker* op_checker) paddle::framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of test op"); AddInput("input", "input of test op");
...@@ -44,7 +44,7 @@ class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { ...@@ -44,7 +44,7 @@ class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker {
}; };
TEST(ProtoMaker, DuplicatedInOut) { TEST(ProtoMaker, DuplicatedInOut) {
paddle::framework::OpProto op_proto; paddle::framework::proto::OpProto op_proto;
paddle::framework::OpAttrChecker op_checker; paddle::framework::OpAttrChecker op_checker;
auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker); auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet); ASSERT_THROW(proto_maker.Validate(), paddle::platform::EnforceNotMet);
......
...@@ -31,7 +31,8 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp( ...@@ -31,7 +31,8 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
} }
static VariableNameMap ConvertOpDescVarsToVarNameMap( static VariableNameMap ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars) { const google::protobuf::RepeatedPtrField<proto::OpDesc::Var>&
op_desc_vars) {
VariableNameMap ret_val; VariableNameMap ret_val;
for (auto& var : op_desc_vars) { for (auto& var : op_desc_vars) {
auto& var_names = ret_val[var.parameter()]; auto& var_names = ret_val[var.parameter()];
...@@ -43,7 +44,8 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap( ...@@ -43,7 +44,8 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
return ret_val; return ret_val;
} }
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) { std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) {
VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be" VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDescBind& op_desc) " "used in unit tests. Use CreateOp(const OpDescBind& op_desc) "
"instead."; "instead.";
......
...@@ -77,7 +77,7 @@ class OpRegistry { ...@@ -77,7 +77,7 @@ class OpRegistry {
const VariableNameMap& outputs, const VariableNameMap& outputs,
AttributeMap attrs); AttributeMap attrs);
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const proto::OpDesc& op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
}; };
......
...@@ -51,7 +51,7 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { ...@@ -51,7 +51,7 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
static void BuildVar(const std::string& param_name, static void BuildVar(const std::string& param_name,
std::initializer_list<const char*> arguments, std::initializer_list<const char*> arguments,
paddle::framework::OpDesc::Var* var) { paddle::framework::proto::OpDesc::Var* var) {
var->set_parameter(param_name); var->set_parameter(param_name);
for (auto& arg_name : arguments) { for (auto& arg_name : arguments) {
var->add_arguments(arg_name); var->add_arguments(arg_name);
...@@ -63,7 +63,7 @@ REGISTER_OP_WITHOUT_GRADIENT(my_test_op, paddle::framework::MyTestOp, ...@@ -63,7 +63,7 @@ REGISTER_OP_WITHOUT_GRADIENT(my_test_op, paddle::framework::MyTestOp,
paddle::framework::MyTestOpProtoAndCheckerMaker); paddle::framework::MyTestOpProtoAndCheckerMaker);
TEST(OpRegistry, CreateOp) { TEST(OpRegistry, CreateOp) {
paddle::framework::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("cos_sim"); op_desc.set_type("cos_sim");
BuildVar("input", {"aa"}, op_desc.add_inputs()); BuildVar("input", {"aa"}, op_desc.add_inputs());
BuildVar("output", {"bb"}, op_desc.add_outputs()); BuildVar("output", {"bb"}, op_desc.add_outputs());
...@@ -71,7 +71,7 @@ TEST(OpRegistry, CreateOp) { ...@@ -71,7 +71,7 @@ TEST(OpRegistry, CreateOp) {
float scale = 3.3; float scale = 3.3;
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale"); attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::proto::AttrType::FLOAT);
attr->set_f(scale); attr->set_f(scale);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
...@@ -83,14 +83,14 @@ TEST(OpRegistry, CreateOp) { ...@@ -83,14 +83,14 @@ TEST(OpRegistry, CreateOp) {
} }
TEST(OpRegistry, IllegalAttr) { TEST(OpRegistry, IllegalAttr) {
paddle::framework::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("cos_sim"); op_desc.set_type("cos_sim");
BuildVar("input", {"aa"}, op_desc.add_inputs()); BuildVar("input", {"aa"}, op_desc.add_inputs());
BuildVar("output", {"bb"}, op_desc.add_outputs()); BuildVar("output", {"bb"}, op_desc.add_outputs());
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale"); attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::proto::AttrType::FLOAT);
attr->set_f(-2.0); attr->set_f(-2.0);
bool caught = false; bool caught = false;
...@@ -108,7 +108,7 @@ TEST(OpRegistry, IllegalAttr) { ...@@ -108,7 +108,7 @@ TEST(OpRegistry, IllegalAttr) {
} }
TEST(OpRegistry, DefaultValue) { TEST(OpRegistry, DefaultValue) {
paddle::framework::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("cos_sim"); op_desc.set_type("cos_sim");
BuildVar("input", {"aa"}, op_desc.add_inputs()); BuildVar("input", {"aa"}, op_desc.add_inputs());
BuildVar("output", {"bb"}, op_desc.add_outputs()); BuildVar("output", {"bb"}, op_desc.add_outputs());
...@@ -123,7 +123,7 @@ TEST(OpRegistry, DefaultValue) { ...@@ -123,7 +123,7 @@ TEST(OpRegistry, DefaultValue) {
} }
TEST(OpRegistry, CustomChecker) { TEST(OpRegistry, CustomChecker) {
paddle::framework::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("my_test_op"); op_desc.set_type("my_test_op");
BuildVar("input", {"ii"}, op_desc.add_inputs()); BuildVar("input", {"ii"}, op_desc.add_inputs());
BuildVar("output", {"oo"}, op_desc.add_outputs()); BuildVar("output", {"oo"}, op_desc.add_outputs());
...@@ -145,7 +145,7 @@ TEST(OpRegistry, CustomChecker) { ...@@ -145,7 +145,7 @@ TEST(OpRegistry, CustomChecker) {
// set 'test_attr' set to an illegal value // set 'test_attr' set to an illegal value
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("test_attr"); attr->set_name("test_attr");
attr->set_type(paddle::framework::AttrType::INT); attr->set_type(paddle::framework::proto::AttrType::INT);
attr->set_i(3); attr->set_i(3);
caught = false; caught = false;
try { try {
...@@ -164,7 +164,7 @@ TEST(OpRegistry, CustomChecker) { ...@@ -164,7 +164,7 @@ TEST(OpRegistry, CustomChecker) {
op_desc.mutable_attrs()->Clear(); op_desc.mutable_attrs()->Clear();
attr = op_desc.mutable_attrs()->Add(); attr = op_desc.mutable_attrs()->Add();
attr->set_name("test_attr"); attr->set_name("test_attr");
attr->set_type(paddle::framework::AttrType::INT); attr->set_type(paddle::framework::proto::AttrType::INT);
attr->set_i(4); attr->set_i(4);
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::platform::CPUDeviceContext dev_ctx; paddle::platform::CPUDeviceContext dev_ctx;
......
...@@ -377,7 +377,7 @@ class RuntimeInferShapeContext : public InferShapeContext { ...@@ -377,7 +377,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
} }
} }
VarDesc::VarType GetVarType(const std::string& name) const override { proto::VarDesc::VarType GetVarType(const std::string& name) const override {
auto* var = scope_.FindVar(name); auto* var = scope_.FindVar(name);
return ToVarType(var->Type()); return ToVarType(var->Type());
} }
...@@ -417,7 +417,7 @@ OpKernelType OperatorWithKernel::GetKernelType( ...@@ -417,7 +417,7 @@ OpKernelType OperatorWithKernel::GetKernelType(
const ExecutionContext& ctx) const { const ExecutionContext& ctx) const {
return OpKernelType(IndicateDataType(ctx), ctx.GetPlace()); return OpKernelType(IndicateDataType(ctx), ctx.GetPlace());
} }
DataType OperatorWithKernel::IndicateDataType( proto::DataType OperatorWithKernel::IndicateDataType(
const ExecutionContext& ctx) const { const ExecutionContext& ctx) const {
auto& scope = ctx.scope(); auto& scope = ctx.scope();
int data_type = -1; int data_type = -1;
...@@ -443,7 +443,7 @@ DataType OperatorWithKernel::IndicateDataType( ...@@ -443,7 +443,7 @@ DataType OperatorWithKernel::IndicateDataType(
} }
} }
PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input"); PADDLE_ENFORCE(data_type != -1, "DataType should be indicated by input");
return static_cast<DataType>(data_type); return static_cast<proto::DataType>(data_type);
} }
} // namespace framework } // namespace framework
......
...@@ -358,12 +358,13 @@ struct OpKernelType { ...@@ -358,12 +358,13 @@ struct OpKernelType {
}; };
platform::Place place_; platform::Place place_;
DataType data_type_; proto::DataType data_type_;
OpKernelType(DataType data_type, platform::Place place) OpKernelType(proto::DataType data_type, platform::Place place)
: place_(place), data_type_(data_type) {} : place_(place), data_type_(data_type) {}
OpKernelType(DataType data_type, const platform::DeviceContext& dev_ctx) OpKernelType(proto::DataType data_type,
const platform::DeviceContext& dev_ctx)
: place_(dev_ctx.GetPlace()), data_type_(data_type) {} : place_(dev_ctx.GetPlace()), data_type_(data_type) {}
bool operator==(const OpKernelType& o) const { bool operator==(const OpKernelType& o) const {
...@@ -409,7 +410,7 @@ class OperatorWithKernel : public OperatorBase { ...@@ -409,7 +410,7 @@ class OperatorWithKernel : public OperatorBase {
private: private:
// indicate kernel DataType by input data. Defaultly all input data must be // indicate kernel DataType by input data. Defaultly all input data must be
// same. // same.
DataType IndicateDataType(const ExecutionContext& ctx) const; proto::DataType IndicateDataType(const ExecutionContext& ctx) const;
}; };
std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key); std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key);
......
...@@ -58,7 +58,7 @@ class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { ...@@ -58,7 +58,7 @@ class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
static void BuildVar(const std::string& param_name, static void BuildVar(const std::string& param_name,
std::initializer_list<const char*> arguments, std::initializer_list<const char*> arguments,
paddle::framework::OpDesc::Var* var) { paddle::framework::proto::OpDesc::Var* var) {
var->set_parameter(param_name); var->set_parameter(param_name);
for (auto& arg_name : arguments) { for (auto& arg_name : arguments) {
*var->mutable_arguments()->Add() = arg_name; *var->mutable_arguments()->Add() = arg_name;
...@@ -70,14 +70,14 @@ REGISTER_OP_WITHOUT_GRADIENT( ...@@ -70,14 +70,14 @@ REGISTER_OP_WITHOUT_GRADIENT(
paddle::framework::OpeWithoutKernelTestProtoAndCheckerMaker); paddle::framework::OpeWithoutKernelTestProtoAndCheckerMaker);
TEST(OperatorBase, all) { TEST(OperatorBase, all) {
paddle::framework::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("test_operator"); op_desc.set_type("test_operator");
BuildVar("input", {"IN1"}, op_desc.add_inputs()); BuildVar("input", {"IN1"}, op_desc.add_inputs());
BuildVar("output", {"OUT1"}, op_desc.add_outputs()); BuildVar("output", {"OUT1"}, op_desc.add_outputs());
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale"); attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::proto::AttrType::FLOAT);
attr->set_f(3.14); attr->set_f(3.14);
paddle::platform::CPUDeviceContext device_context; paddle::platform::CPUDeviceContext device_context;
...@@ -115,7 +115,7 @@ class OpWithKernelTest : public OperatorWithKernel { ...@@ -115,7 +115,7 @@ class OpWithKernelTest : public OperatorWithKernel {
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override {} void InferShape(framework::InferShapeContext* ctx) const override {}
OpKernelType GetKernelType(const ExecutionContext& ctx) const override { OpKernelType GetKernelType(const ExecutionContext& ctx) const override {
return OpKernelType(DataType::FP32, ctx.GetPlace()); return OpKernelType(proto::DataType::FP32, ctx.GetPlace());
} }
}; };
...@@ -195,14 +195,14 @@ REGISTER_OP_CPU_KERNEL(op_with_kernel, ...@@ -195,14 +195,14 @@ REGISTER_OP_CPU_KERNEL(op_with_kernel,
// test with single input // test with single input
TEST(OpKernel, all) { TEST(OpKernel, all) {
paddle::framework::OpDesc op_desc; paddle::framework::proto::OpDesc op_desc;
op_desc.set_type("op_with_kernel"); op_desc.set_type("op_with_kernel");
BuildVar("x", {"IN1"}, op_desc.add_inputs()); BuildVar("x", {"IN1"}, op_desc.add_inputs());
BuildVar("y", {"OUT1"}, op_desc.add_outputs()); BuildVar("y", {"OUT1"}, op_desc.add_outputs());
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale"); attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::proto::AttrType::FLOAT);
attr->set_f(3.14); attr->set_f(3.14);
paddle::platform::CPUDeviceContext cpu_device_context; paddle::platform::CPUDeviceContext cpu_device_context;
...@@ -224,7 +224,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel, ...@@ -224,7 +224,7 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
TEST(OpKernel, multi_inputs) { TEST(OpKernel, multi_inputs) {
using namespace paddle::framework; using namespace paddle::framework;
OpDesc op_desc; proto::OpDesc op_desc;
op_desc.set_type("op_multi_inputs_with_kernel"); op_desc.set_type("op_multi_inputs_with_kernel");
BuildVar("xs", {"x0", "x1", "x2"}, op_desc.add_inputs()); BuildVar("xs", {"x0", "x1", "x2"}, op_desc.add_inputs());
BuildVar("k", {"k0"}, op_desc.add_inputs()); BuildVar("k", {"k0"}, op_desc.add_inputs());
...@@ -232,7 +232,7 @@ TEST(OpKernel, multi_inputs) { ...@@ -232,7 +232,7 @@ TEST(OpKernel, multi_inputs) {
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale"); attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::proto::AttrType::FLOAT);
attr->set_f(3.14); attr->set_f(3.14);
paddle::platform::CPUDeviceContext cpu_device_context; paddle::platform::CPUDeviceContext cpu_device_context;
......
...@@ -26,7 +26,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { ...@@ -26,7 +26,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) {
return blocks_.back().get(); return blocks_.back().get();
} }
ProgramDesc *ProgramDescBind::Proto() { proto::ProgramDesc *ProgramDescBind::Proto() {
for (auto &block : blocks_) { for (auto &block : blocks_) {
block->Flush(); block->Flush();
} }
...@@ -49,7 +49,7 @@ ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) { ...@@ -49,7 +49,7 @@ ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) {
} }
} }
ProgramDescBind::ProgramDescBind(const ProgramDesc &desc) { ProgramDescBind::ProgramDescBind(const proto::ProgramDesc &desc) {
desc_ = desc; desc_ = desc;
for (auto &block_desc : *desc_.mutable_blocks()) { for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block_desc)); blocks_.emplace_back(new BlockDescBind(this, &block_desc));
......
...@@ -29,7 +29,7 @@ class ProgramDescBind { ...@@ -29,7 +29,7 @@ class ProgramDescBind {
public: public:
ProgramDescBind(); ProgramDescBind();
explicit ProgramDescBind(const ProgramDesc &desc); explicit ProgramDescBind(const proto::ProgramDesc &desc);
ProgramDescBind(const ProgramDescBind &o); ProgramDescBind(const ProgramDescBind &o);
...@@ -43,10 +43,10 @@ class ProgramDescBind { ...@@ -43,10 +43,10 @@ class ProgramDescBind {
size_t Size() const { return blocks_.size(); } size_t Size() const { return blocks_.size(); }
ProgramDesc *Proto(); proto::ProgramDesc *Proto();
private: private:
ProgramDesc desc_; proto::ProgramDesc desc_;
std::vector<std::unique_ptr<BlockDescBind>> blocks_; std::vector<std::unique_ptr<BlockDescBind>> blocks_;
}; };
......
...@@ -22,15 +22,15 @@ TEST(ProgramDesc, copy_ctor) { ...@@ -22,15 +22,15 @@ TEST(ProgramDesc, copy_ctor) {
ProgramDescBind program; ProgramDescBind program;
auto* global_block = program.MutableBlock(0); auto* global_block = program.MutableBlock(0);
auto* x = global_block->Var("X"); auto* x = global_block->Var("X");
x->SetType(VarDesc_VarType_LOD_TENSOR); x->SetType(proto::VarDesc_VarType_LOD_TENSOR);
x->SetLoDLevel(0); x->SetLoDLevel(0);
x->SetDataType(FP32); x->SetDataType(proto::FP32);
x->SetShape({1000, 784}); x->SetShape({1000, 784});
auto* y = global_block->Var("Y"); auto* y = global_block->Var("Y");
y->SetType(VarDesc_VarType_LOD_TENSOR); y->SetType(proto::VarDesc_VarType_LOD_TENSOR);
y->SetLoDLevel(0); y->SetLoDLevel(0);
y->SetDataType(FP32); y->SetDataType(proto::FP32);
y->SetShape({784, 100}); y->SetShape({784, 100});
auto* op = global_block->AppendOp(); auto* op = global_block->AppendOp();
...@@ -39,7 +39,7 @@ TEST(ProgramDesc, copy_ctor) { ...@@ -39,7 +39,7 @@ TEST(ProgramDesc, copy_ctor) {
op->SetInput("Y", {y->Name()}); op->SetInput("Y", {y->Name()});
auto* out = global_block->Var("Out"); auto* out = global_block->Var("Out");
out->SetType(VarDesc_VarType_LOD_TENSOR); out->SetType(proto::VarDesc_VarType_LOD_TENSOR);
op->SetOutput("Y", {out->Name()}); op->SetOutput("Y", {out->Name()});
ProgramDescBind program_copy(program); ProgramDescBind program_copy(program);
...@@ -84,15 +84,15 @@ TEST(ProgramDescBind, serialize_and_deserialize) { ...@@ -84,15 +84,15 @@ TEST(ProgramDescBind, serialize_and_deserialize) {
ProgramDescBind program_origin; ProgramDescBind program_origin;
auto* global_block = program_origin.MutableBlock(0); auto* global_block = program_origin.MutableBlock(0);
auto* x = global_block->Var("X"); auto* x = global_block->Var("X");
x->SetType(VarDesc_VarType_LOD_TENSOR); x->SetType(proto::VarDesc_VarType_LOD_TENSOR);
x->SetLoDLevel(0); x->SetLoDLevel(0);
x->SetDataType(FP32); x->SetDataType(proto::FP32);
x->SetShape({1000, 784}); x->SetShape({1000, 784});
auto* y = global_block->Var("Y"); auto* y = global_block->Var("Y");
y->SetType(VarDesc_VarType_LOD_TENSOR); y->SetType(proto::VarDesc_VarType_LOD_TENSOR);
y->SetLoDLevel(0); y->SetLoDLevel(0);
y->SetDataType(FP32); y->SetDataType(proto::FP32);
y->SetShape({784, 100}); y->SetShape({784, 100});
auto* op = global_block->AppendOp(); auto* op = global_block->AppendOp();
...@@ -101,7 +101,7 @@ TEST(ProgramDescBind, serialize_and_deserialize) { ...@@ -101,7 +101,7 @@ TEST(ProgramDescBind, serialize_and_deserialize) {
op->SetInput("Y", {y->Name()}); op->SetInput("Y", {y->Name()});
auto* out = global_block->Var("Out"); auto* out = global_block->Var("Out");
out->SetType(VarDesc_VarType_LOD_TENSOR); out->SetType(proto::VarDesc_VarType_LOD_TENSOR);
op->SetOutput("Y", {out->Name()}); op->SetOutput("Y", {out->Name()});
std::string binary_str; std::string binary_str;
......
...@@ -29,7 +29,7 @@ const std::string kFetchOpType = "fetch"; ...@@ -29,7 +29,7 @@ const std::string kFetchOpType = "fetch";
const std::string kDropOutOpType = "dropout"; const std::string kDropOutOpType = "dropout";
const std::string kBatchNormOpType = "batch_norm"; const std::string kBatchNormOpType = "batch_norm";
bool HasDependentVar(const OpDesc& op_desc, bool HasDependentVar(const proto::OpDesc& op_desc,
const std::set<std::string>& dependent_vars) { const std::set<std::string>& dependent_vars) {
for (auto& var : op_desc.outputs()) { for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) { for (auto& argu : var.arguments()) {
...@@ -41,14 +41,15 @@ bool HasDependentVar(const OpDesc& op_desc, ...@@ -41,14 +41,15 @@ bool HasDependentVar(const OpDesc& op_desc,
return false; return false;
} }
bool IsTarget(const OpDesc& op_desc) { bool IsTarget(const proto::OpDesc& op_desc) {
if (op_desc.has_is_target()) { if (op_desc.has_is_target()) {
return op_desc.is_target(); return op_desc.is_target();
} }
return false; return false;
} }
void prune_impl(const ProgramDesc& input, ProgramDesc* output, int block_id) { void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output,
int block_id) {
// TODO(tonyyang-svail): // TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op // - will change to use multiple blocks for RNN op and Cond Op
...@@ -104,12 +105,12 @@ void prune_impl(const ProgramDesc& input, ProgramDesc* output, int block_id) { ...@@ -104,12 +105,12 @@ void prune_impl(const ProgramDesc& input, ProgramDesc* output, int block_id) {
} }
// TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies // TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies
void Prune(const ProgramDesc& input, ProgramDesc* output) { void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output) {
prune_impl(input, output, 0); prune_impl(input, output, 0);
} }
void inference_optimize_impl(const ProgramDesc& input, ProgramDesc* output, void inference_optimize_impl(const proto::ProgramDesc& input,
int block_id) { proto::ProgramDesc* output, int block_id) {
*output = input; *output = input;
auto* op_field = output->mutable_blocks(block_id)->mutable_ops(); auto* op_field = output->mutable_blocks(block_id)->mutable_ops();
for (auto& op_desc : *op_field) { for (auto& op_desc : *op_field) {
...@@ -125,7 +126,8 @@ void inference_optimize_impl(const ProgramDesc& input, ProgramDesc* output, ...@@ -125,7 +126,8 @@ void inference_optimize_impl(const ProgramDesc& input, ProgramDesc* output,
} }
} }
void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output) { void InferenceOptimize(const proto::ProgramDesc& input,
proto::ProgramDesc* output) {
inference_optimize_impl(input, output, 0); inference_optimize_impl(input, output, 0);
} }
......
...@@ -20,9 +20,10 @@ limitations under the License. */ ...@@ -20,9 +20,10 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void Prune(const ProgramDesc& input, ProgramDesc* output); void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output);
void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output); void InferenceOptimize(const proto::ProgramDesc& input,
proto::ProgramDesc* output);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -34,7 +34,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs, ...@@ -34,7 +34,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
for (auto kv : outputs) { for (auto kv : outputs) {
for (auto v : kv.second) { for (auto v : kv.second) {
auto var = block->Var(v); auto var = block->Var(v);
var->SetDataType(paddle::framework::DataType::FP32); var->SetDataType(paddle::framework::proto::DataType::FP32);
} }
} }
...@@ -57,14 +57,14 @@ TEST(Prune, one_operator) { ...@@ -57,14 +57,14 @@ TEST(Prune, one_operator) {
AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{}, AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{},
block); block);
f::ProgramDesc *pdesc = program.Proto(); f::proto::ProgramDesc *pdesc = program.Proto();
f::ProgramDesc pruned; f::proto::ProgramDesc pruned;
Prune(*pdesc, &pruned); f::Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 0); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 0);
pdesc->mutable_blocks(0)->mutable_ops(0)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(0)->set_is_target(true);
Prune(*pdesc, &pruned); f::Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 1); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 1);
} }
...@@ -81,12 +81,12 @@ TEST(Prune, forward) { ...@@ -81,12 +81,12 @@ TEST(Prune, forward) {
AddOp("one_one", {{"input", {"d"}}}, {{"output", {"e"}}}, f::AttributeMap{}, AddOp("one_one", {{"input", {"d"}}}, {{"output", {"e"}}}, f::AttributeMap{},
block); block);
f::ProgramDesc *pdesc = program.Proto(); f::proto::ProgramDesc *pdesc = program.Proto();
for (int i = 0; i < pdesc->blocks(0).ops_size(); ++i) { for (int i = 0; i < pdesc->blocks(0).ops_size(); ++i) {
f::ProgramDesc pruned; f::proto::ProgramDesc pruned;
pdesc->mutable_blocks(0)->mutable_ops(i)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(i)->set_is_target(true);
Prune(*pdesc, &pruned); f::Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), i + 1); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), i + 1);
} }
} }
...@@ -104,11 +104,11 @@ TEST(Prune, multi_input_op) { ...@@ -104,11 +104,11 @@ TEST(Prune, multi_input_op) {
AddOp("three_one", {{"input", {"b0", "b1", "b2"}}}, {{"output", {"c"}}}, AddOp("three_one", {{"input", {"b0", "b1", "b2"}}}, {{"output", {"c"}}},
f::AttributeMap{}, block); f::AttributeMap{}, block);
f::ProgramDesc *pdesc = program.Proto(); f::proto::ProgramDesc *pdesc = program.Proto();
pdesc->mutable_blocks(0)->mutable_ops(3)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(3)->set_is_target(true);
f::ProgramDesc pruned; f::proto::ProgramDesc pruned;
Prune(*pdesc, &pruned); f::Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 4); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 4);
} }
...@@ -123,11 +123,11 @@ TEST(Prune, multi_output_op) { ...@@ -123,11 +123,11 @@ TEST(Prune, multi_output_op) {
AddOp("one_one", {{"input", {"c"}}}, {{"output", {"c1"}}}, f::AttributeMap{}, AddOp("one_one", {{"input", {"c"}}}, {{"output", {"c1"}}}, f::AttributeMap{},
block); block);
f::ProgramDesc *pdesc = program.Proto(); f::proto::ProgramDesc *pdesc = program.Proto();
pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);
f::ProgramDesc pruned; f::proto::ProgramDesc pruned;
Prune(*pdesc, &pruned); f::Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 2); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 2);
} }
...@@ -142,11 +142,11 @@ TEST(Prune, multi_target) { ...@@ -142,11 +142,11 @@ TEST(Prune, multi_target) {
AddOp("one_one", {{"input", {"c"}}}, {{"output", {"c1"}}}, f::AttributeMap{}, AddOp("one_one", {{"input", {"c"}}}, {{"output", {"c1"}}}, f::AttributeMap{},
block); block);
f::ProgramDesc *pdesc = program.Proto(); f::proto::ProgramDesc *pdesc = program.Proto();
pdesc->mutable_blocks(0)->mutable_ops(1)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(1)->set_is_target(true);
pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);
f::ProgramDesc pruned; f::proto::ProgramDesc pruned;
Prune(*pdesc, &pruned); f::Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 3); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 3);
} }
...@@ -57,17 +57,17 @@ void InferShapeContext::SetDims(const std::vector<std::string> &names, ...@@ -57,17 +57,17 @@ void InferShapeContext::SetDims(const std::vector<std::string> &names,
SetDim(names[i], dims[i]); SetDim(names[i], dims[i]);
} }
} }
std::vector<VarDesc::VarType> InferShapeContext::GetInputsVarType( std::vector<proto::VarDesc::VarType> InferShapeContext::GetInputsVarType(
const std::string &name) const { const std::string &name) const {
return GetVarTypes(Inputs(name)); return GetVarTypes(Inputs(name));
} }
std::vector<VarDesc::VarType> InferShapeContext::GetOutputsVarType( std::vector<proto::VarDesc::VarType> InferShapeContext::GetOutputsVarType(
const std::string &name) const { const std::string &name) const {
return GetVarTypes(Outputs(name)); return GetVarTypes(Outputs(name));
} }
std::vector<VarDesc::VarType> InferShapeContext::GetVarTypes( std::vector<proto::VarDesc::VarType> InferShapeContext::GetVarTypes(
const std::vector<std::string> &names) const { const std::vector<std::string> &names) const {
std::vector<VarDesc::VarType> retv; std::vector<proto::VarDesc::VarType> retv;
retv.resize(names.size()); retv.resize(names.size());
std::transform(names.begin(), names.end(), retv.begin(), std::transform(names.begin(), names.end(), retv.begin(),
std::bind(std::mem_fn(&InferShapeContext::GetVarType), this, std::bind(std::mem_fn(&InferShapeContext::GetVarType), this,
......
...@@ -27,8 +27,9 @@ class InferShapeContext { ...@@ -27,8 +27,9 @@ class InferShapeContext {
virtual bool HasInput(const std::string &name) const = 0; virtual bool HasInput(const std::string &name) const = 0;
virtual bool HasOutput(const std::string &name) const = 0; virtual bool HasOutput(const std::string &name) const = 0;
std::vector<VarDesc::VarType> GetInputsVarType(const std::string &name) const; std::vector<proto::VarDesc::VarType> GetInputsVarType(
std::vector<VarDesc::VarType> GetOutputsVarType( const std::string &name) const;
std::vector<proto::VarDesc::VarType> GetOutputsVarType(
const std::string &name) const; const std::string &name) const;
virtual bool HasInputs(const std::string &name) const = 0; virtual bool HasInputs(const std::string &name) const = 0;
...@@ -65,10 +66,10 @@ class InferShapeContext { ...@@ -65,10 +66,10 @@ class InferShapeContext {
std::vector<framework::DDim> GetDims( std::vector<framework::DDim> GetDims(
const std::vector<std::string> &names) const; const std::vector<std::string> &names) const;
std::vector<VarDesc::VarType> GetVarTypes( std::vector<proto::VarDesc::VarType> GetVarTypes(
const std::vector<std::string> &names) const; const std::vector<std::string> &names) const;
virtual VarDesc::VarType GetVarType(const std::string &name) const = 0; virtual proto::VarDesc::VarType GetVarType(const std::string &name) const = 0;
}; };
} // namespace framework } // namespace framework
......
...@@ -18,15 +18,17 @@ limitations under the License. */ ...@@ -18,15 +18,17 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); } proto::VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); }
void VarDescBind::SetType(VarDesc::VarType type) { desc_.set_type(type); } void VarDescBind::SetType(proto::VarDesc::VarType type) {
desc_.set_type(type);
}
void VarDescBind::SetShape(const std::vector<int64_t> &dims) { void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims()); VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
} }
void VarDescBind::SetDataType(DataType data_type) { void VarDescBind::SetDataType(proto::DataType data_type) {
mutable_tensor_desc()->set_data_type(data_type); mutable_tensor_desc()->set_data_type(data_type);
} }
...@@ -34,14 +36,16 @@ std::vector<int64_t> VarDescBind::Shape() const { ...@@ -34,14 +36,16 @@ std::vector<int64_t> VarDescBind::Shape() const {
return RepeatedToVector(tensor_desc().dims()); return RepeatedToVector(tensor_desc().dims());
} }
DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); } proto::DataType VarDescBind::GetDataType() const {
return tensor_desc().data_type();
}
void VarDescBind::SetLoDLevel(int32_t lod_level) { void VarDescBind::SetLoDLevel(int32_t lod_level) {
switch (desc_.type()) { switch (desc_.type()) {
case VarDesc::LOD_TENSOR: case proto::VarDesc::LOD_TENSOR:
desc_.mutable_lod_tensor()->set_lod_level(lod_level); desc_.mutable_lod_tensor()->set_lod_level(lod_level);
break; break;
case VarDesc::LOD_TENSOR_ARRAY: case proto::VarDesc::LOD_TENSOR_ARRAY:
desc_.mutable_tensor_array()->set_lod_level(lod_level); desc_.mutable_tensor_array()->set_lod_level(lod_level);
break; break;
default: default:
...@@ -52,9 +56,9 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) { ...@@ -52,9 +56,9 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) {
int32_t VarDescBind::GetLodLevel() const { int32_t VarDescBind::GetLodLevel() const {
switch (desc_.type()) { switch (desc_.type()) {
case VarDesc::LOD_TENSOR: case proto::VarDesc::LOD_TENSOR:
return desc_.lod_tensor().lod_level(); return desc_.lod_tensor().lod_level();
case VarDesc::LOD_TENSOR_ARRAY: case proto::VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().lod_level(); return desc_.tensor_array().lod_level();
default: default:
PADDLE_THROW("Tensor type=%d does not support LoDLevel", PADDLE_THROW("Tensor type=%d does not support LoDLevel",
...@@ -62,29 +66,29 @@ int32_t VarDescBind::GetLodLevel() const { ...@@ -62,29 +66,29 @@ int32_t VarDescBind::GetLodLevel() const {
} }
} }
const TensorDesc &VarDescBind::tensor_desc() const { const proto::TensorDesc &VarDescBind::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type"); PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type");
switch (desc_.type()) { switch (desc_.type()) {
case VarDesc::SELECTED_ROWS: case proto::VarDesc::SELECTED_ROWS:
return desc_.selected_rows(); return desc_.selected_rows();
case VarDesc::LOD_TENSOR: case proto::VarDesc::LOD_TENSOR:
return desc_.lod_tensor().tensor(); return desc_.lod_tensor().tensor();
case VarDesc::LOD_TENSOR_ARRAY: case proto::VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().tensor(); return desc_.tensor_array().tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("Unexpected branch.");
} }
} }
TensorDesc *VarDescBind::mutable_tensor_desc() { proto::TensorDesc *VarDescBind::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(), PADDLE_ENFORCE(desc_.has_type(),
"invoke MutableTensorDesc must after set type"); "invoke MutableTensorDesc must after set type");
switch (desc_.type()) { switch (desc_.type()) {
case VarDesc::SELECTED_ROWS: case proto::VarDesc::SELECTED_ROWS:
return desc_.mutable_selected_rows(); return desc_.mutable_selected_rows();
case VarDesc::LOD_TENSOR: case proto::VarDesc::LOD_TENSOR:
return desc_.mutable_lod_tensor()->mutable_tensor(); return desc_.mutable_lod_tensor()->mutable_tensor();
case VarDesc::LOD_TENSOR_ARRAY: case proto::VarDesc::LOD_TENSOR_ARRAY:
return desc_.mutable_tensor_array()->mutable_tensor(); return desc_.mutable_tensor_array()->mutable_tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("Unexpected branch.");
......
...@@ -57,40 +57,40 @@ class VarDescBind { ...@@ -57,40 +57,40 @@ class VarDescBind {
public: public:
explicit VarDescBind(const std::string &name) { explicit VarDescBind(const std::string &name) {
desc_.set_name(name); desc_.set_name(name);
desc_.set_type(VarDesc::LOD_TENSOR); desc_.set_type(proto::VarDesc::LOD_TENSOR);
} }
explicit VarDescBind(const VarDesc &desc) : desc_(desc) {} explicit VarDescBind(const proto::VarDesc &desc) : desc_(desc) {}
VarDesc *Proto() { return &desc_; } proto::VarDesc *Proto() { return &desc_; }
std::string Name() const { return desc_.name(); } std::string Name() const { return desc_.name(); }
void SetShape(const std::vector<int64_t> &dims); void SetShape(const std::vector<int64_t> &dims);
void SetDataType(DataType data_type); void SetDataType(proto::DataType data_type);
std::vector<int64_t> Shape() const; std::vector<int64_t> Shape() const;
DataType GetDataType() const; proto::DataType GetDataType() const;
void SetLoDLevel(int32_t lod_level); void SetLoDLevel(int32_t lod_level);
int32_t GetLodLevel() const; int32_t GetLodLevel() const;
VarDesc::VarType GetType() const; proto::VarDesc::VarType GetType() const;
void SetType(VarDesc::VarType type); void SetType(proto::VarDesc::VarType type);
bool Persistable() const { return desc_.persistable(); } bool Persistable() const { return desc_.persistable(); }
void SetPersistable(bool persistable) { desc_.set_persistable(persistable); } void SetPersistable(bool persistable) { desc_.set_persistable(persistable); }
private: private:
const TensorDesc &tensor_desc() const; const proto::TensorDesc &tensor_desc() const;
TensorDesc *mutable_tensor_desc(); proto::TensorDesc *mutable_tensor_desc();
VarDesc desc_; proto::VarDesc desc_;
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -20,15 +20,15 @@ ...@@ -20,15 +20,15 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
inline VarDesc::VarType ToVarType(std::type_index type) { inline proto::VarDesc::VarType ToVarType(std::type_index type) {
if (type.hash_code() == typeid(LoDTensor).hash_code()) { if (type.hash_code() == typeid(LoDTensor).hash_code()) {
return VarDesc_VarType_LOD_TENSOR; return proto::VarDesc_VarType_LOD_TENSOR;
} else if (type.hash_code() == typeid(LoDRankTable).hash_code()) { } else if (type.hash_code() == typeid(LoDRankTable).hash_code()) {
return VarDesc_VarType_LOD_RANK_TABLE; return proto::VarDesc_VarType_LOD_RANK_TABLE;
} else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) { } else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) {
return VarDesc_VarType_LOD_TENSOR_ARRAY; return proto::VarDesc_VarType_LOD_TENSOR_ARRAY;
} else if (type.hash_code() == typeid(SelectedRows).hash_code()) { } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
return VarDesc_VarType_SELECTED_ROWS; return proto::VarDesc_VarType_SELECTED_ROWS;
} else { } else {
PADDLE_THROW("ToVarType:Unsupported type %s", type.name()); PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
} }
...@@ -37,16 +37,16 @@ inline VarDesc::VarType ToVarType(std::type_index type) { ...@@ -37,16 +37,16 @@ inline VarDesc::VarType ToVarType(std::type_index type) {
template <typename Visitor> template <typename Visitor>
inline void VisitVarType(const Variable& var, Visitor visitor) { inline void VisitVarType(const Variable& var, Visitor visitor) {
switch (ToVarType(var.Type())) { switch (ToVarType(var.Type())) {
case VarDesc_VarType_LOD_TENSOR: case proto::VarDesc_VarType_LOD_TENSOR:
visitor(var.Get<framework::LoDTensor>()); visitor(var.Get<framework::LoDTensor>());
return; return;
case VarDesc_VarType_LOD_RANK_TABLE: case proto::VarDesc_VarType_LOD_RANK_TABLE:
visitor(var.Get<LoDRankTable>()); visitor(var.Get<LoDRankTable>());
return; return;
case VarDesc_VarType_LOD_TENSOR_ARRAY: case proto::VarDesc_VarType_LOD_TENSOR_ARRAY:
visitor(var.Get<LoDTensorArray>()); visitor(var.Get<LoDTensorArray>());
return; return;
case VarDesc_VarType_SELECTED_ROWS: case proto::VarDesc_VarType_SELECTED_ROWS:
visitor(var.Get<SelectedRows>()); visitor(var.Get<SelectedRows>());
return; return;
default: default:
......
...@@ -36,14 +36,14 @@ class SumOpVarTypeInference : public VarTypeInference { ...@@ -36,14 +36,14 @@ class SumOpVarTypeInference : public VarTypeInference {
void operator()(const OpDescBind &op_desc, void operator()(const OpDescBind &op_desc,
BlockDescBind *block) const override { BlockDescBind *block) const override {
auto &inputs = op_desc.Input("X"); auto &inputs = op_desc.Input("X");
auto default_var_type = VarDesc::SELECTED_ROWS; auto default_var_type = proto::VarDesc::SELECTED_ROWS;
bool any_input_is_lod_tensor = std::any_of( bool any_input_is_lod_tensor = std::any_of(
inputs.begin(), inputs.end(), [block](const std::string &name) { inputs.begin(), inputs.end(), [block](const std::string &name) {
return block->Var(name)->GetType() == VarDesc::LOD_TENSOR; return block->Var(name)->GetType() == proto::VarDesc::LOD_TENSOR;
}); });
if (any_input_is_lod_tensor) { if (any_input_is_lod_tensor) {
default_var_type = VarDesc::LOD_TENSOR; default_var_type = proto::VarDesc::LOD_TENSOR;
} }
auto out_var_name = op_desc.Output("Out").front(); auto out_var_name = op_desc.Output("Out").front();
...@@ -68,19 +68,19 @@ TEST(InferVarType, sum_op) { ...@@ -68,19 +68,19 @@ TEST(InferVarType, sum_op) {
op->SetInput("X", {"test_a", "test_b", "test_c"}); op->SetInput("X", {"test_a", "test_b", "test_c"});
op->SetOutput("Out", {"test_out"}); op->SetOutput("Out", {"test_out"});
prog.MutableBlock(0)->Var("test_a")->SetType(VarDesc::SELECTED_ROWS); prog.MutableBlock(0)->Var("test_a")->SetType(proto::VarDesc::SELECTED_ROWS);
prog.MutableBlock(0)->Var("test_b")->SetType(VarDesc::SELECTED_ROWS); prog.MutableBlock(0)->Var("test_b")->SetType(proto::VarDesc::SELECTED_ROWS);
prog.MutableBlock(0)->Var("test_c")->SetType(VarDesc::SELECTED_ROWS); prog.MutableBlock(0)->Var("test_c")->SetType(proto::VarDesc::SELECTED_ROWS);
prog.MutableBlock(0)->Var("test_out"); prog.MutableBlock(0)->Var("test_out");
op->InferVarType(prog.MutableBlock(0)); op->InferVarType(prog.MutableBlock(0));
ASSERT_EQ(VarDesc::SELECTED_ROWS, ASSERT_EQ(proto::VarDesc::SELECTED_ROWS,
prog.MutableBlock(0)->Var("test_out")->GetType()); prog.MutableBlock(0)->Var("test_out")->GetType());
prog.MutableBlock(0)->Var("test_b")->SetType(VarDesc::LOD_TENSOR); prog.MutableBlock(0)->Var("test_b")->SetType(proto::VarDesc::LOD_TENSOR);
op->InferVarType(prog.MutableBlock(0)); op->InferVarType(prog.MutableBlock(0));
ASSERT_EQ(VarDesc::LOD_TENSOR, ASSERT_EQ(proto::VarDesc::LOD_TENSOR,
prog.MutableBlock(0)->Var("test_out")->GetType()); prog.MutableBlock(0)->Var("test_out")->GetType());
} }
...@@ -91,14 +91,14 @@ TEST(InferVarType, sum_op_without_infer_var_type) { ...@@ -91,14 +91,14 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); op->SetInput("X", {"test2_a", "test2_b", "test2_c"});
op->SetOutput("Out", {"test2_out"}); op->SetOutput("Out", {"test2_out"});
prog.MutableBlock(0)->Var("test2_a")->SetType(VarDesc::SELECTED_ROWS); prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarDesc::SELECTED_ROWS);
prog.MutableBlock(0)->Var("test2_b")->SetType(VarDesc::SELECTED_ROWS); prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarDesc::SELECTED_ROWS);
prog.MutableBlock(0)->Var("test2_c")->SetType(VarDesc::SELECTED_ROWS); prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarDesc::SELECTED_ROWS);
prog.MutableBlock(0)->Var("test2_out"); prog.MutableBlock(0)->Var("test2_out");
op->InferVarType(prog.MutableBlock(0)); op->InferVarType(prog.MutableBlock(0));
ASSERT_EQ(VarDesc_VarType_LOD_TENSOR, ASSERT_EQ(proto::VarDesc_VarType_LOD_TENSOR,
prog.MutableBlock(0)->Var("test2_out")->GetType()); prog.MutableBlock(0)->Var("test2_out")->GetType());
} }
......
...@@ -126,6 +126,11 @@ public: ...@@ -126,6 +126,11 @@ public:
inputData += inputChannels * inputHeight * inputWidth; inputData += inputChannels * inputHeight * inputWidth;
outputData += outputChannels * outputHeight * outputWidth; outputData += outputChannels * outputHeight * outputWidth;
} }
#ifdef PADDLE_MOBILE_INFERENCE
if (Device == DEVICE_TYPE_CPU) {
memory_.reset();
}
#endif
} }
}; };
......
...@@ -84,12 +84,15 @@ void ROIPoolLayer::forward(PassType passType) { ...@@ -84,12 +84,15 @@ void ROIPoolLayer::forward(PassType passType) {
size_t poolChannelOffset = pooledHeight_ * pooledWidth_; size_t poolChannelOffset = pooledHeight_ * pooledWidth_;
real* outputData = outputValue->getData(); real* outputData = outputValue->getData();
Matrix::resizeOrCreate(maxIdxs_, real* argmaxData = nullptr;
numROIs, if (passType != PASS_TEST) {
channels_ * pooledHeight_ * pooledWidth_, Matrix::resizeOrCreate(maxIdxs_,
false, numROIs,
false); channels_ * pooledHeight_ * pooledWidth_,
real* argmaxData = maxIdxs_->getData(); false,
false);
argmaxData = maxIdxs_->getData();
}
for (size_t n = 0; n < numROIs; ++n) { for (size_t n = 0; n < numROIs; ++n) {
// the first five elememts of each RoI should be: // the first five elememts of each RoI should be:
...@@ -128,14 +131,18 @@ void ROIPoolLayer::forward(PassType passType) { ...@@ -128,14 +131,18 @@ void ROIPoolLayer::forward(PassType passType) {
bool isEmpty = (hend <= hstart) || (wend <= wstart); bool isEmpty = (hend <= hstart) || (wend <= wstart);
size_t poolIndex = ph * pooledWidth_ + pw; size_t poolIndex = ph * pooledWidth_ + pw;
outputData[poolIndex] = isEmpty ? 0 : -FLT_MAX; outputData[poolIndex] = isEmpty ? 0 : -FLT_MAX;
argmaxData[poolIndex] = -1; if (argmaxData) {
argmaxData[poolIndex] = -1;
}
for (size_t h = hstart; h < hend; ++h) { for (size_t h = hstart; h < hend; ++h) {
for (size_t w = wstart; w < wend; ++w) { for (size_t w = wstart; w < wend; ++w) {
size_t index = h * width_ + w; size_t index = h * width_ + w;
if (batchData[index] > outputData[poolIndex]) { if (batchData[index] > outputData[poolIndex]) {
outputData[poolIndex] = batchData[index]; outputData[poolIndex] = batchData[index];
argmaxData[poolIndex] = index; if (argmaxData) {
argmaxData[poolIndex] = index;
}
} }
} }
} }
...@@ -143,7 +150,9 @@ void ROIPoolLayer::forward(PassType passType) { ...@@ -143,7 +150,9 @@ void ROIPoolLayer::forward(PassType passType) {
} }
batchData += channelOffset; batchData += channelOffset;
outputData += poolChannelOffset; outputData += poolChannelOffset;
argmaxData += poolChannelOffset; if (argmaxData) {
argmaxData += poolChannelOffset;
}
} }
bottomROIs += roiOffset; bottomROIs += roiOffset;
} }
......
...@@ -171,12 +171,31 @@ void SequenceToBatch::sequence2BatchCopy(Matrix &batch, ...@@ -171,12 +171,31 @@ void SequenceToBatch::sequence2BatchCopy(Matrix &batch,
hl_sequence2batch_copy( hl_sequence2batch_copy(
batchData, seqData, idxData, seqWidth, batchCount, seq2batch); batchData, seqData, idxData, seqWidth, batchCount, seq2batch);
} else { } else {
for (int i = 0; i < batchCount; ++i) { if (seq2batch) {
if (seq2batch) { #ifdef PADDLE_USE_MKLML
const int blockMemSize = 8 * 1024;
const int blockSize = blockMemSize / sizeof(real);
#pragma omp parallel for collapse(2)
for (int i = 0; i < batchCount; ++i) {
for (int j = 0; j < seqWidth; j += blockSize) {
memcpy(batch.rowBuf(i) + j,
sequence.rowBuf(idxData[i]) + j,
(j + blockSize > seqWidth) ? (seqWidth - j) * sizeof(real)
: blockMemSize);
}
}
#else
for (int i = 0; i < batchCount; ++i) {
memcpy(batch.rowBuf(i), memcpy(batch.rowBuf(i),
sequence.rowBuf(idxData[i]), sequence.rowBuf(idxData[i]),
seqWidth * sizeof(real)); seqWidth * sizeof(real));
} else { }
#endif
} else {
#ifdef PADDLE_USE_MKLML
#pragma omp parallel for
#endif
for (int i = 0; i < batchCount; ++i) {
memcpy(sequence.rowBuf(idxData[i]), memcpy(sequence.rowBuf(idxData[i]),
batch.rowBuf(i), batch.rowBuf(i),
seqWidth * sizeof(real)); seqWidth * sizeof(real));
......
...@@ -79,7 +79,7 @@ public: ...@@ -79,7 +79,7 @@ public:
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
HOSTDEVICE inline explicit float16(const half& h) { HOSTDEVICE inline explicit float16(const half& h) {
#if CUDA_VERSION >= 9000 #if CUDA_VERSION >= 9000
x = reinterpret_cast<__half_raw*>(&h)->x; x = reinterpret_cast<__half_raw*>(const_cast<half*>(&h))->x;
#else #else
x = h.x; x = h.x;
#endif // CUDA_VERSION >= 9000 #endif // CUDA_VERSION >= 9000
...@@ -145,7 +145,7 @@ public: ...@@ -145,7 +145,7 @@ public:
#ifdef PADDLE_CUDA_FP16 #ifdef PADDLE_CUDA_FP16
HOSTDEVICE inline float16& operator=(const half& rhs) { HOSTDEVICE inline float16& operator=(const half& rhs) {
#if CUDA_VERSION >= 9000 #if CUDA_VERSION >= 9000
x = reinterpret_cast<__half_raw*>(&rhs)->x; x = reinterpret_cast<__half_raw*>(const_cast<half*>(&rhs))->x;
#else #else
x = rhs.x; x = rhs.x;
#endif #endif
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <stdlib.h> // for malloc and free #include <stdlib.h> // for malloc and free
#include <sys/mman.h> // for mlock and munlock #include <sys/mman.h> // for mlock and munlock
#include <algorithm> // for std::max
#include "gflags/gflags.h" #include "gflags/gflags.h"
...@@ -28,7 +29,7 @@ limitations under the License. */ ...@@ -28,7 +29,7 @@ limitations under the License. */
// of memory available to the system for paging. So, by default, we // of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory. // should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
DECLARE_double(fraction_of_gpu_memory_to_use);
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
...@@ -77,45 +78,20 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { ...@@ -77,45 +78,20 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr // CUDA documentation doesn't explain if cudaMalloc returns nullptr
// if size is 0. We just make sure it does. // if size is 0. We just make sure it does.
if (size <= 0) return nullptr; if (size <= 0) return nullptr;
void* p;
size_t available = 0; cudaError_t result = cudaMalloc(&p, size);
size_t capacity = 0;
paddle::platform::GpuMemoryUsage(available, capacity);
// Reserve memory for page tables, etc.
size_t reserving = 0.05 * capacity + paddle::platform::GpuMinChunkSize();
size_t usable = available > reserving ? available - reserving : 0;
// If remaining size no less than expected size, using general
// cudaMalloc to allocate GPU memory.
void* p = 0;
if (size <= usable) {
cudaError_t result = cudaMalloc(&p, size);
if (result == cudaSuccess) {
index = 0;
gpu_alloc_size_ += size;
return p;
}
}
// If remaining size less than expected size or cudaMalloc failed,
// cudaMallocHost will be considered as a fallback allocator.
//
// NOTE: here, we use GpuMaxAllocSize() as the maximum memory size
// of host fallback allocation. Allocates too much would reduce
// the amount of memory available to the underlying system for paging.
usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_;
if (size > usable) return nullptr;
cudaError_t result = cudaMallocHost(&p, size);
if (result == cudaSuccess) { if (result == cudaSuccess) {
index = 1; index = 0;
fallback_alloc_size_ += size; gpu_alloc_size_ += size;
return p; return p;
} else {
LOG(WARNING)
<< "Cannot malloc " << size / 1024.0 / 1024.0
<< " MB GPU memory. Please shrink FLAGS_fraction_of_gpu_memory_to_use "
"environment variable to a lower value. Current value is "
<< FLAGS_fraction_of_gpu_memory_to_use;
return nullptr;
} }
return nullptr;
} }
void GPUAllocator::Free(void* p, size_t size, size_t index) { void GPUAllocator::Free(void* p, size_t size, size_t index) {
......
...@@ -63,8 +63,7 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -63,8 +63,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker { class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AccuracyOpMaker(framework::OpProto *proto, AccuracyOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
// TODO(typhoonzero): support both inference value and indices. // TODO(typhoonzero): support both inference value and indices.
AddInput("Out", "The network output of topk (inferences)"); AddInput("Out", "The network output of topk (inferences)");
......
...@@ -26,7 +26,7 @@ template <int BlockSize> ...@@ -26,7 +26,7 @@ template <int BlockSize>
__global__ void AccuracyCudaKernel(const int N, const int D, __global__ void AccuracyCudaKernel(const int N, const int D,
const int64_t* Xdata, const int64_t* Xdata,
const int64_t* labeldata, int* correct_data, const int64_t* labeldata, int* correct_data,
float* accuracy) { float* accuracy, int* total_data) {
int count = 0; int count = 0;
__shared__ int total[BlockSize]; __shared__ int total[BlockSize];
...@@ -47,6 +47,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D, ...@@ -47,6 +47,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D,
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
*correct_data = result; *correct_data = result;
*accuracy = static_cast<float>(result) / static_cast<float>(N); *accuracy = static_cast<float>(result) / static_cast<float>(N);
*total_data = N;
} }
} }
...@@ -80,22 +81,11 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> { ...@@ -80,22 +81,11 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
if (num_samples == 0) { if (num_samples == 0) {
return; return;
} }
platform::GpuMemcpyAsync(total_data, &num_samples, sizeof(int),
cudaMemcpyHostToDevice, stream);
AccuracyCudaKernel< AccuracyCudaKernel<
PADDLE_CUDA_NUM_THREADS><<<1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>( PADDLE_CUDA_NUM_THREADS><<<1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
num_samples, infer_width, indices_data, label_data, correct_data, num_samples, infer_width, indices_data, label_data, correct_data,
accuracy_data); accuracy_data, total_data);
int d_num_samples, d_num_correct;
float d_accuracy;
platform::GpuMemcpyAsync(&d_num_correct, correct_data, sizeof(int),
cudaMemcpyDeviceToHost, stream);
platform::GpuMemcpyAsync(&d_num_samples, total_data, sizeof(int),
cudaMemcpyDeviceToHost, stream);
platform::GpuMemcpyAsync(&d_accuracy, accuracy_data, sizeof(float),
cudaMemcpyDeviceToHost, stream);
} }
}; };
......
...@@ -38,9 +38,8 @@ class ActivationOpGrad : public framework::OperatorWithKernel { ...@@ -38,9 +38,8 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SigmoidOpMaker(framework::OpProto *proto, SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sigmoid operator"); AddInput("X", "Input of Sigmoid operator");
AddOutput("Y", "Output of Sigmoid operator"); AddOutput("Y", "Output of Sigmoid operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -54,9 +53,8 @@ $$y = \frac{1}{1 + e^{-x}}$$ ...@@ -54,9 +53,8 @@ $$y = \frac{1}{1 + e^{-x}}$$
class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LogSigmoidOpMaker(framework::OpProto *proto, LogSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LogSigmoid operator"); AddInput("X", "Input of LogSigmoid operator");
AddOutput("Y", "Output of LogSigmoid operator"); AddOutput("Y", "Output of LogSigmoid operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -70,8 +68,8 @@ $$y = \log \frac{1}{1 + e^{-x}}$$ ...@@ -70,8 +68,8 @@ $$y = \log \frac{1}{1 + e^{-x}}$$
class ExpOpMaker : public framework::OpProtoAndCheckerMaker { class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ExpOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ExpOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Exp operator"); AddInput("X", "Input of Exp operator");
AddOutput("Y", "Output of Exp operator"); AddOutput("Y", "Output of Exp operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -85,8 +83,8 @@ $y = e^x$ ...@@ -85,8 +83,8 @@ $y = e^x$
class ReluOpMaker : public framework::OpProtoAndCheckerMaker { class ReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu operator"); AddInput("X", "Input of Relu operator");
AddOutput("Y", "Output of Relu operator"); AddOutput("Y", "Output of Relu operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -100,9 +98,8 @@ $y = \max(x, 0)$ ...@@ -100,9 +98,8 @@ $y = \max(x, 0)$
class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LeakyReluOpMaker(framework::OpProto *proto, LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LeakyRelu operator"); AddInput("X", "Input of LeakyRelu operator");
AddOutput("Y", "Output of LeakyRelu operator"); AddOutput("Y", "Output of LeakyRelu operator");
AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f); AddAttr<float>("alpha", "The small negative slope").SetDefault(0.02f);
...@@ -117,9 +114,8 @@ $y = \max(x, \alpha * x)$ ...@@ -117,9 +114,8 @@ $y = \max(x, \alpha * x)$
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftShrinkOpMaker(framework::OpProto *proto, SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softshrink operator"); AddInput("X", "Input of Softshrink operator");
AddOutput("Y", "Output of Softshrink operator"); AddOutput("Y", "Output of Softshrink operator");
AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f); AddAttr<float>("lambda", "non-negative offset").SetDefault(0.5f);
...@@ -140,8 +136,8 @@ $$ ...@@ -140,8 +136,8 @@ $$
class TanhOpMaker : public framework::OpProtoAndCheckerMaker { class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
TanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) TanhOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Tanh operator"); AddInput("X", "Input of Tanh operator");
AddOutput("Y", "Output of Tanh operator"); AddOutput("Y", "Output of Tanh operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -155,9 +151,8 @@ $$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ ...@@ -155,9 +151,8 @@ $$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
TanhShrinkOpMaker(framework::OpProto *proto, TanhShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of TanhShrink operator"); AddInput("X", "Input of TanhShrink operator");
AddOutput("Y", "Output of TanhShrink operator"); AddOutput("Y", "Output of TanhShrink operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -171,9 +166,8 @@ $$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ ...@@ -171,9 +166,8 @@ $$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
HardShrinkOpMaker(framework::OpProto *proto, HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardShrink operator"); AddInput("X", "Input of HardShrink operator");
AddOutput("Y", "Output of HardShrink operator"); AddOutput("Y", "Output of HardShrink operator");
AddAttr<float>("threshold", "The value of threshold for HardShrink") AddAttr<float>("threshold", "The value of threshold for HardShrink")
...@@ -195,8 +189,8 @@ $$ ...@@ -195,8 +189,8 @@ $$
class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SqrtOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SqrtOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Sqrt operator"); AddInput("X", "Input of Sqrt operator");
AddOutput("Y", "Output of Sqrt operator"); AddOutput("Y", "Output of Sqrt operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -210,8 +204,8 @@ $y = \sqrt{x}$ ...@@ -210,8 +204,8 @@ $y = \sqrt{x}$
class AbsOpMaker : public framework::OpProtoAndCheckerMaker { class AbsOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AbsOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) AbsOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Abs operator"); AddInput("X", "Input of Abs operator");
AddOutput("Y", "Output of Abs operator"); AddOutput("Y", "Output of Abs operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -225,8 +219,8 @@ $y = |x|$ ...@@ -225,8 +219,8 @@ $y = |x|$
class CeilOpMaker : public framework::OpProtoAndCheckerMaker { class CeilOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CeilOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) CeilOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Ceil operator"); AddInput("X", "Input of Ceil operator");
AddOutput("Y", "Output of Ceil operator"); AddOutput("Y", "Output of Ceil operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -240,8 +234,8 @@ $y = ceil(x)$ ...@@ -240,8 +234,8 @@ $y = ceil(x)$
class FloorOpMaker : public framework::OpProtoAndCheckerMaker { class FloorOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FloorOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) FloorOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Floor operator"); AddInput("X", "Input of Floor operator");
AddOutput("Y", "Output of Floor operator"); AddOutput("Y", "Output of Floor operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -255,8 +249,8 @@ $y = floor(x)$ ...@@ -255,8 +249,8 @@ $y = floor(x)$
class RoundOpMaker : public framework::OpProtoAndCheckerMaker { class RoundOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RoundOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Round operator"); AddInput("X", "Input of Round operator");
AddOutput("Y", "Output of Round operator"); AddOutput("Y", "Output of Round operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -270,9 +264,8 @@ $y = [x]$ ...@@ -270,9 +264,8 @@ $y = [x]$
class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ReciprocalOpMaker(framework::OpProto *proto, ReciprocalOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Reciprocal operator"); AddInput("X", "Input of Reciprocal operator");
AddOutput("Y", "Output of Reciprocal operator"); AddOutput("Y", "Output of Reciprocal operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -286,8 +279,8 @@ $$y = \frac{1}{x}$$ ...@@ -286,8 +279,8 @@ $$y = \frac{1}{x}$$
class LogOpMaker : public framework::OpProtoAndCheckerMaker { class LogOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LogOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) LogOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Log operator"); AddInput("X", "Input of Log operator");
AddOutput("Y", "Output of Log operator"); AddOutput("Y", "Output of Log operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -303,8 +296,8 @@ Natural logarithm of x. ...@@ -303,8 +296,8 @@ Natural logarithm of x.
class SquareOpMaker : public framework::OpProtoAndCheckerMaker { class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SquareOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SquareOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Square operator"); AddInput("X", "Input of Square operator");
AddOutput("Y", "Output of Square operator"); AddOutput("Y", "Output of Square operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -318,9 +311,8 @@ $y = x^2$ ...@@ -318,9 +311,8 @@ $y = x^2$
class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftplusOpMaker(framework::OpProto *proto, SoftplusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softplus operator"); AddInput("X", "Input of Softplus operator");
AddOutput("Y", "Output of Softplus operator"); AddOutput("Y", "Output of Softplus operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -334,9 +326,8 @@ $y = \ln(1 + e^{x})$ ...@@ -334,9 +326,8 @@ $y = \ln(1 + e^{x})$
class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftsignOpMaker(framework::OpProto *proto, SoftsignOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softsign operator"); AddInput("X", "Input of Softsign operator");
AddOutput("Y", "Output of Softsign operator"); AddOutput("Y", "Output of Softsign operator");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -350,8 +341,8 @@ $$y = \frac{x}{1 + |x|}$$ ...@@ -350,8 +341,8 @@ $$y = \frac{x}{1 + |x|}$$
class BReluOpMaker : public framework::OpProtoAndCheckerMaker { class BReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) BReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of BRelu operator"); AddInput("X", "Input of BRelu operator");
AddOutput("Y", "Output of BRelu operator"); AddOutput("Y", "Output of BRelu operator");
AddAttr<float>("t_min", "The min marginal value of BRelu") AddAttr<float>("t_min", "The min marginal value of BRelu")
...@@ -369,9 +360,8 @@ $y = \max(\min(x, t_{min}), t_{max})$ ...@@ -369,9 +360,8 @@ $y = \max(\min(x, t_{min}), t_{max})$
class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftReluOpMaker(framework::OpProto *proto, SoftReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of SoftRelu operator"); AddInput("X", "Input of SoftRelu operator");
AddOutput("Y", "Output of SoftRelu operator"); AddOutput("Y", "Output of SoftRelu operator");
AddAttr<float>("threshold", "The threshold value of SoftRelu") AddAttr<float>("threshold", "The threshold value of SoftRelu")
...@@ -387,8 +377,8 @@ $y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ ...@@ -387,8 +377,8 @@ $y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$
class ELUOpMaker : public framework::OpProtoAndCheckerMaker { class ELUOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ELUOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ELUOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of ELU operator"); AddInput("X", "Input of ELU operator");
AddOutput("Y", "Output of ELU operator"); AddOutput("Y", "Output of ELU operator");
AddAttr<float>("alpha", "The alpha value of ELU").SetDefault(1.0f); AddAttr<float>("alpha", "The alpha value of ELU").SetDefault(1.0f);
...@@ -406,8 +396,8 @@ $y = \max(0, x) + \min(0, \alpha * (e^x - 1))$ ...@@ -406,8 +396,8 @@ $y = \max(0, x) + \min(0, \alpha * (e^x - 1))$
class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { class Relu6OpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Relu6OpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) Relu6OpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Relu6 operator"); AddInput("X", "Input of Relu6 operator");
AddOutput("Y", "Output of Relu6 operator"); AddOutput("Y", "Output of Relu6 operator");
AddAttr<float>("threshold", "The threshold value of Relu6") AddAttr<float>("threshold", "The threshold value of Relu6")
...@@ -423,8 +413,8 @@ $y = \min(\max(0, x), 6)$ ...@@ -423,8 +413,8 @@ $y = \min(\max(0, x), 6)$
class PowOpMaker : public framework::OpProtoAndCheckerMaker { class PowOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
PowOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) PowOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Pow operator"); AddInput("X", "Input of Pow operator");
AddOutput("Y", "Output of Pow operator"); AddOutput("Y", "Output of Pow operator");
AddAttr<float>("factor", "The exponential factor of Pow").SetDefault(1.0f); AddAttr<float>("factor", "The exponential factor of Pow").SetDefault(1.0f);
...@@ -439,8 +429,8 @@ $y = x^{factor}$ ...@@ -439,8 +429,8 @@ $y = x^{factor}$
class STanhOpMaker : public framework::OpProtoAndCheckerMaker { class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
STanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) STanhOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of STanh operator"); AddInput("X", "Input of STanh operator");
AddOutput("Y", "Output of STanh operator"); AddOutput("Y", "Output of STanh operator");
AddAttr<float>("scale_a", "The scale parameter of a for the input") AddAttr<float>("scale_a", "The scale parameter of a for the input")
...@@ -458,9 +448,8 @@ $$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ ...@@ -458,9 +448,8 @@ $$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ThresholdedReluOpMaker(framework::OpProto *proto, ThresholdedReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of ThresholdedRelu operator"); AddInput("X", "Input of ThresholdedRelu operator");
AddOutput("Y", "Output of ThresholdedRelu operator"); AddOutput("Y", "Output of ThresholdedRelu operator");
AddAttr<float>("threshold", "The threshold location of activation") AddAttr<float>("threshold", "The threshold location of activation")
...@@ -481,9 +470,8 @@ $$ ...@@ -481,9 +470,8 @@ $$
class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
HardSigmoidOpMaker(framework::OpProto *proto, HardSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardSigmoid operator"); AddInput("X", "Input of HardSigmoid operator");
AddOutput("Y", "Output of HardSigmoid operator"); AddOutput("Y", "Output of HardSigmoid operator");
AddAttr<float>("slope", "Slope for linear approximation of sigmoid") AddAttr<float>("slope", "Slope for linear approximation of sigmoid")
...@@ -508,8 +496,8 @@ It is recommended to use the defaults for this activation. ...@@ -508,8 +496,8 @@ It is recommended to use the defaults for this activation.
class SwishOpMaker : public framework::OpProtoAndCheckerMaker { class SwishOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SwishOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SwishOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Swish operator"); AddInput("X", "Input of Swish operator");
AddOutput("Y", "Output of Swish operator"); AddOutput("Y", "Output of Swish operator");
AddAttr<float>("beta", "Constant beta of swish operator").SetDefault(1.0f); AddAttr<float>("beta", "Constant beta of swish operator").SetDefault(1.0f);
......
...@@ -59,8 +59,7 @@ class AdadeltaOp : public framework::OperatorWithKernel { ...@@ -59,8 +59,7 @@ class AdadeltaOp : public framework::OperatorWithKernel {
class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker { class AdadeltaOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdadeltaOpMaker(framework::OpProto *proto, AdadeltaOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
......
...@@ -59,8 +59,7 @@ class AdagradOp : public framework::OperatorWithKernel { ...@@ -59,8 +59,7 @@ class AdagradOp : public framework::OperatorWithKernel {
class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdagradOpMaker(framework::OpProto* proto, AdagradOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
......
...@@ -73,7 +73,7 @@ class AdamOp : public framework::OperatorWithKernel { ...@@ -73,7 +73,7 @@ class AdamOp : public framework::OperatorWithKernel {
class AdamOpMaker : public framework::OpProtoAndCheckerMaker { class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdamOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) AdamOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
......
...@@ -67,7 +67,7 @@ class AdamaxOp : public framework::OperatorWithKernel { ...@@ -67,7 +67,7 @@ class AdamaxOp : public framework::OperatorWithKernel {
class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker { class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AdamaxOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) AdamaxOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
......
...@@ -114,8 +114,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -114,8 +114,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ArrayToLoDTensorOpProtoMaker(framework::OpProto *proto, ArrayToLoDTensorOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(std::vector<LodTensor>) A vector of tensors that is going to " "(std::vector<LodTensor>) A vector of tensors that is going to "
......
...@@ -86,8 +86,7 @@ class AssignOp : public framework::OperatorBase { ...@@ -86,8 +86,7 @@ class AssignOp : public framework::OperatorBase {
class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker { class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AssignOpProtoMaker(framework::OpProto *proto, AssignOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(LoDTensor, SelectedRows or LoDTensorArray) The input variable " "(LoDTensor, SelectedRows or LoDTensorArray) The input variable "
...@@ -109,8 +108,8 @@ class AssignInferShape : public framework::InferShapeBase { ...@@ -109,8 +108,8 @@ class AssignInferShape : public framework::InferShapeBase {
void operator()(framework::InferShapeContext *context) const override { void operator()(framework::InferShapeContext *context) const override {
if (context->HasInput("X")) { if (context->HasInput("X")) {
auto type = context->GetInputsVarType("X")[0]; auto type = context->GetInputsVarType("X")[0];
if (type == framework::VarDesc_VarType_SELECTED_ROWS || if (type == framework::proto::VarDesc_VarType_SELECTED_ROWS ||
type == framework::VarDesc_VarType_LOD_TENSOR) { type == framework::proto::VarDesc_VarType_LOD_TENSOR) {
context->SetOutputDim("Out", context->GetInputDim("X")); context->SetOutputDim("Out", context->GetInputDim("X"));
} }
} }
......
...@@ -49,7 +49,7 @@ class AucOp : public framework::OperatorWithKernel { ...@@ -49,7 +49,7 @@ class AucOp : public framework::OperatorWithKernel {
class AucOpMaker : public framework::OpProtoAndCheckerMaker { class AucOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AucOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) AucOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Out", AddInput("Out",
"A floating point 2D tensor, values are in the range [0, 1]." "A floating point 2D tensor, values are in the range [0, 1]."
......
...@@ -85,8 +85,7 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -85,8 +85,7 @@ class BatchNormOp : public framework::OperatorWithKernel {
class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BatchNormOpMaker(framework::OpProto *proto, BatchNormOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<bool>("is_test", "").SetDefault(false); AddAttr<bool>("is_test", "").SetDefault(false);
AddAttr<float>("momentum", "").SetDefault(0.9); AddAttr<float>("momentum", "").SetDefault(0.9);
......
...@@ -83,9 +83,8 @@ class BeamSearchDecodeOp : public framework::OperatorBase { ...@@ -83,9 +83,8 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker { class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BeamSearchDecodeOpProtoMaker(framework::OpProto* proto, BeamSearchDecodeOpProtoMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ids", AddInput("Ids",
"(LodTensorArray)" "(LodTensorArray)"
"score of the candidate words in each step"); "score of the candidate words in each step");
...@@ -123,10 +122,10 @@ class BeamSearchDecodeInferVarType : public framework::VarTypeInference { ...@@ -123,10 +122,10 @@ class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
void operator()(const framework::OpDescBind& op_desc, void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override { framework::BlockDescBind* block) const override {
for (auto& o : op_desc.Output("SentenceIds")) { for (auto& o : op_desc.Output("SentenceIds")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR); block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
} }
for (auto& o : op_desc.Output("SentenceScores")) { for (auto& o : op_desc.Output("SentenceScores")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR); block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
} }
} }
}; };
......
...@@ -153,8 +153,7 @@ bool BeamSearch::NextItemSet(std::vector<BeamSearch::Item> *items) { ...@@ -153,8 +153,7 @@ bool BeamSearch::NextItemSet(std::vector<BeamSearch::Item> *items) {
class BeamSearchProtoAndCheckerMaker class BeamSearchProtoAndCheckerMaker
: public framework::OpProtoAndCheckerMaker { : public framework::OpProtoAndCheckerMaker {
public: public:
BeamSearchProtoAndCheckerMaker(framework::OpProto *proto, BeamSearchProtoAndCheckerMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
// inputs and outputs stored in proto // inputs and outputs stored in proto
AddInput("pre_ids", "ids in previous step"); AddInput("pre_ids", "ids in previous step");
......
...@@ -65,8 +65,7 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel { ...@@ -65,8 +65,7 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel {
class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BilinearTensorProductOpMaker(framework::OpProto* proto, BilinearTensorProductOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of bilinear_tensor_product operator."); AddInput("X", "The first input of bilinear_tensor_product operator.");
AddInput("Y", "The second input of bilinear_tensor_product operator."); AddInput("Y", "The second input of bilinear_tensor_product operator.");
......
...@@ -20,8 +20,7 @@ namespace operators { ...@@ -20,8 +20,7 @@ namespace operators {
class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CastOpProtoMaker(framework::OpProto *proto, CastOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of cast op"); AddInput("X", "The input tensor of cast op");
AddOutput("Out", "The output tensor of cast op"); AddOutput("Out", "The output tensor of cast op");
......
...@@ -55,7 +55,7 @@ class CastOpKernel : public framework::OpKernel<InT> { ...@@ -55,7 +55,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
auto* in = context.Input<framework::Tensor>("X"); auto* in = context.Input<framework::Tensor>("X");
auto* out = context.Output<framework::Tensor>("Out"); auto* out = context.Output<framework::Tensor>("Out");
framework::VisitDataType( framework::VisitDataType(
static_cast<framework::DataType>(context.Attr<int>("out_dtype")), static_cast<framework::proto::DataType>(context.Attr<int>("out_dtype")),
CastOpFunctor<DeviceContext, InT>( CastOpFunctor<DeviceContext, InT>(
in, out, context.template device_context<DeviceContext>())); in, out, context.template device_context<DeviceContext>()));
} }
......
...@@ -32,6 +32,13 @@ class ChunkEvalOp : public framework::OperatorWithKernel { ...@@ -32,6 +32,13 @@ class ChunkEvalOp : public framework::OperatorWithKernel {
"Output(Recall) of ChunkEvalOp should not be null."); "Output(Recall) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("F1-Score"), PADDLE_ENFORCE(ctx->HasOutput("F1-Score"),
"Output(F1-Score) of ChunkEvalOp should not be null."); "Output(F1-Score) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("NumInferChunks"),
"Output(NumInferChunks) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("NumLabelChunks"),
"Output(NumLabelChunks) of ChunkEvalOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("NumCorrectChunks"),
"Output(NumCorrectChunks) of ChunkEvalOp should not be null.");
auto inference_dim = ctx->GetInputDim("Inference"); auto inference_dim = ctx->GetInputDim("Inference");
auto label_dim = ctx->GetInputDim("Label"); auto label_dim = ctx->GetInputDim("Label");
...@@ -42,20 +49,22 @@ class ChunkEvalOp : public framework::OperatorWithKernel { ...@@ -42,20 +49,22 @@ class ChunkEvalOp : public framework::OperatorWithKernel {
ctx->SetOutputDim("Precision", {1}); ctx->SetOutputDim("Precision", {1});
ctx->SetOutputDim("Recall", {1}); ctx->SetOutputDim("Recall", {1});
ctx->SetOutputDim("F1-Score", {1}); ctx->SetOutputDim("F1-Score", {1});
ctx->SetOutputDim("NumInferChunks", {1});
ctx->SetOutputDim("NumLabelChunks", {1});
ctx->SetOutputDim("NumCorrectChunks", {1});
} }
protected: protected:
framework::OpKernelType GetKernelType( framework::OpKernelType GetKernelType(
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(framework::DataType::FP32, return framework::OpKernelType(framework::proto::DataType::FP32,
ctx.device_context()); ctx.device_context());
} }
}; };
class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker { class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ChunkEvalOpMaker(framework::OpProto *proto, ChunkEvalOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Inference", AddInput("Inference",
"(Tensor, default: Tensor<int64_t>). " "(Tensor, default: Tensor<int64_t>). "
...@@ -70,6 +79,16 @@ class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -70,6 +79,16 @@ class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker {
"sensitivity) of chunks on the given mini-batch."); "sensitivity) of chunks on the given mini-batch.");
AddOutput("F1-Score", AddOutput("F1-Score",
"(float). The evaluated F1-Score on the given mini-batch."); "(float). The evaluated F1-Score on the given mini-batch.");
AddOutput("NumInferChunks",
"(int64_t). The number of chunks in Inference on the given "
"mini-batch.");
AddOutput(
"NumLabelChunks",
"(int64_t). The number of chunks in Label on the given mini-batch.");
AddOutput(
"NumCorrectChunks",
"(int64_t). The number of chunks both in Inference and Label on the "
"given mini-batch.");
AddAttr<int>("num_chunk_types", AddAttr<int>("num_chunk_types",
"(int). The number of chunk type. See below for details."); "(int). The number of chunk type. See below for details.");
AddAttr<std::string>( AddAttr<std::string>(
......
...@@ -111,9 +111,7 @@ class ChunkEvalKernel : public framework::OpKernel<T> { ...@@ -111,9 +111,7 @@ class ChunkEvalKernel : public framework::OpKernel<T> {
std::vector<Segment> label_segments; std::vector<Segment> label_segments;
std::vector<Segment> output_segments; std::vector<Segment> output_segments;
std::set<int> excluded_chunk_types; std::set<int> excluded_chunk_types;
int64_t num_output_segments = 0;
int64_t num_label_segments = 0;
int64_t num_correct = 0;
if (context.Attr<std::string>("chunk_scheme") == "IOB") { if (context.Attr<std::string>("chunk_scheme") == "IOB") {
num_tag_types = 2; num_tag_types = 2;
tag_begin = 0; tag_begin = 0;
...@@ -151,12 +149,24 @@ class ChunkEvalKernel : public framework::OpKernel<T> { ...@@ -151,12 +149,24 @@ class ChunkEvalKernel : public framework::OpKernel<T> {
auto* precision = context.Output<Tensor>("Precision"); auto* precision = context.Output<Tensor>("Precision");
auto* recall = context.Output<Tensor>("Recall"); auto* recall = context.Output<Tensor>("Recall");
auto* f1 = context.Output<Tensor>("F1-Score"); auto* f1 = context.Output<Tensor>("F1-Score");
auto* num_infer_chunks = context.Output<Tensor>("NumInferChunks");
auto* num_label_chunks = context.Output<Tensor>("NumLabelChunks");
auto* num_correct_chunks = context.Output<Tensor>("NumCorrectChunks");
const int64_t* inference_data = inference->data<int64_t>(); const int64_t* inference_data = inference->data<int64_t>();
const int64_t* label_data = label->data<int64_t>(); const int64_t* label_data = label->data<int64_t>();
T* precision_data = precision->mutable_data<T>(context.GetPlace()); T* precision_data = precision->mutable_data<T>(context.GetPlace());
T* racall_data = recall->mutable_data<T>(context.GetPlace()); T* racall_data = recall->mutable_data<T>(context.GetPlace());
T* f1_data = f1->mutable_data<T>(context.GetPlace()); T* f1_data = f1->mutable_data<T>(context.GetPlace());
int64_t* num_infer_chunks_data =
num_infer_chunks->mutable_data<int64_t>(context.GetPlace());
int64_t* num_label_chunks_data =
num_label_chunks->mutable_data<int64_t>(context.GetPlace());
int64_t* num_correct_chunks_data =
num_correct_chunks->mutable_data<int64_t>(context.GetPlace());
*num_infer_chunks_data = 0;
*num_label_chunks_data = 0;
*num_correct_chunks_data = 0;
auto lod = label->lod(); auto lod = label->lod();
PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
...@@ -166,17 +176,23 @@ class ChunkEvalKernel : public framework::OpKernel<T> { ...@@ -166,17 +176,23 @@ class ChunkEvalKernel : public framework::OpKernel<T> {
for (int i = 0; i < num_sequences; ++i) { for (int i = 0; i < num_sequences; ++i) {
int seq_length = lod[0][i + 1] - lod[0][i]; int seq_length = lod[0][i + 1] - lod[0][i];
EvalOneSeq(inference_data + lod[0][i], label_data + lod[0][i], seq_length, EvalOneSeq(inference_data + lod[0][i], label_data + lod[0][i], seq_length,
output_segments, label_segments, num_output_segments, output_segments, label_segments, *num_infer_chunks_data,
num_label_segments, num_correct, num_chunk_types, *num_label_chunks_data, *num_correct_chunks_data,
num_tag_types, other_chunk_type, tag_begin, tag_inside, num_chunk_types, num_tag_types, other_chunk_type, tag_begin,
tag_end, tag_single, excluded_chunk_types); tag_inside, tag_end, tag_single, excluded_chunk_types);
} }
*precision_data = !num_output_segments ? 0 : static_cast<T>(num_correct) / *precision_data = !(*num_infer_chunks_data)
num_output_segments; ? 0
*racall_data = !num_label_segments ? 0 : static_cast<T>(num_correct) / : static_cast<T>(*num_correct_chunks_data) /
num_label_segments; (*num_infer_chunks_data);
*f1_data = !num_correct ? 0 : 2 * (*precision_data) * (*racall_data) / *racall_data = !(*num_label_chunks_data)
((*precision_data) + (*racall_data)); ? 0
: static_cast<T>(*num_correct_chunks_data) /
(*num_label_chunks_data);
*f1_data = !(*num_correct_chunks_data)
? 0
: 2 * (*precision_data) * (*racall_data) /
((*precision_data) + (*racall_data));
} }
void EvalOneSeq(const int64_t* output, const int64_t* label, int length, void EvalOneSeq(const int64_t* output, const int64_t* label, int length,
......
...@@ -37,8 +37,7 @@ class ClipByNormOp : public framework::OperatorWithKernel { ...@@ -37,8 +37,7 @@ class ClipByNormOp : public framework::OperatorWithKernel {
class ClipByNormOpMaker : public framework::OpProtoAndCheckerMaker { class ClipByNormOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ClipByNormOpMaker(framework::OpProto* proto, ClipByNormOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor) The input of clip_by_norm op." "(Tensor) The input of clip_by_norm op."
......
...@@ -38,7 +38,7 @@ class ClipOp : public framework::OperatorWithKernel { ...@@ -38,7 +38,7 @@ class ClipOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class ClipOpMaker : public framework::OpProtoAndCheckerMaker { class ClipOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ClipOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) ClipOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor)The input of clip op." "(Tensor)The input of clip op."
......
...@@ -20,8 +20,7 @@ namespace operators { ...@@ -20,8 +20,7 @@ namespace operators {
template <typename OpComment> template <typename OpComment>
class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker { class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CompareOpProtoMaker(framework::OpProto *proto, CompareOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
OpComment comment; OpComment comment;
AddInput("X", AddInput("X",
......
...@@ -58,7 +58,7 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -58,7 +58,7 @@ class ConcatOp : public framework::OperatorWithKernel {
class ConcatOpMaker : public framework::OpProtoAndCheckerMaker { class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ConcatOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input tensors of concat operator.").AsDuplicable(); AddInput("X", "Input tensors of concat operator.").AsDuplicable();
AddOutput("Out", "Output tensor of concat operator."); AddOutput("Out", "Output tensor of concat operator.");
......
...@@ -205,8 +205,7 @@ void CondOp::Run(const Scope& scope, ...@@ -205,8 +205,7 @@ void CondOp::Run(const Scope& scope,
class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker { class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CondOpProtoAndCheckerMaker(framework::OpProto* proto, CondOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Cond", "The condition, which is a bool vector"); AddInput("Cond", "The condition, which is a bool vector");
AddInput("Xs", "Inputs of Subnets").AsDuplicable(); AddInput("Xs", "Inputs of Subnets").AsDuplicable();
......
...@@ -74,8 +74,7 @@ class ConditionalBlockOp : public ConditionalOp { ...@@ -74,8 +74,7 @@ class ConditionalBlockOp : public ConditionalOp {
class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker { class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ConditionalBlockOpProtoMaker(framework::OpProto *proto, ConditionalBlockOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The conditional variable of this operator. If X is empty, the " "The conditional variable of this operator. If X is empty, the "
......
...@@ -19,8 +19,7 @@ namespace operators { ...@@ -19,8 +19,7 @@ namespace operators {
class CudnnConv2DOpMaker : public Conv2DOpMaker { class CudnnConv2DOpMaker : public Conv2DOpMaker {
public: public:
CudnnConv2DOpMaker(framework::OpProto* proto, CudnnConv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: Conv2DOpMaker(proto, op_checker) { : Conv2DOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB", AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, " "workspace size for cudnn, in MB, "
...@@ -34,8 +33,7 @@ class CudnnConv2DOpMaker : public Conv2DOpMaker { ...@@ -34,8 +33,7 @@ class CudnnConv2DOpMaker : public Conv2DOpMaker {
class CudnnConv3DOpMaker : public Conv3DOpMaker { class CudnnConv3DOpMaker : public Conv3DOpMaker {
public: public:
CudnnConv3DOpMaker(framework::OpProto* proto, CudnnConv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: Conv3DOpMaker(proto, op_checker) { : Conv3DOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB", AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, " "workspace size for cudnn, in MB, "
......
...@@ -66,8 +66,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -66,8 +66,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"Input", "Input",
...@@ -138,8 +137,7 @@ $$ ...@@ -138,8 +137,7 @@ $$
)DOC"); )DOC");
} }
Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"Input", "Input",
......
...@@ -50,14 +50,12 @@ inline bool IsExpand(std::vector<int64_t>& filter_dim, ...@@ -50,14 +50,12 @@ inline bool IsExpand(std::vector<int64_t>& filter_dim,
// operator implementations can reuse the code. // operator implementations can reuse the code.
class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Conv2DOpMaker(framework::OpProto* proto, Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker);
framework::OpAttrChecker* op_checker);
}; };
class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker { class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Conv3DOpMaker(framework::OpProto* proto, Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker);
framework::OpAttrChecker* op_checker);
}; };
class ConvOp : public framework::OperatorWithKernel { class ConvOp : public framework::OperatorWithKernel {
......
...@@ -75,8 +75,7 @@ class ConvShiftGradOp : public framework::OperatorWithKernel { ...@@ -75,8 +75,7 @@ class ConvShiftGradOp : public framework::OperatorWithKernel {
class ConvShiftOpMaker : public framework::OpProtoAndCheckerMaker { class ConvShiftOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ConvShiftOpMaker(framework::OpProto *proto, ConvShiftOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor, default Tensor<float>), a 2-D tensor with shape B x M, " "(Tensor, default Tensor<float>), a 2-D tensor with shape B x M, "
......
...@@ -19,8 +19,7 @@ namespace operators { ...@@ -19,8 +19,7 @@ namespace operators {
class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
public: public:
CudnnConv2DTransposeOpMaker(framework::OpProto* proto, CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: Conv2DTransposeOpMaker(proto, op_checker) { : Conv2DTransposeOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB", AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, " "workspace size for cudnn, in MB, "
...@@ -34,8 +33,7 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { ...@@ -34,8 +33,7 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker { class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker {
public: public:
CudnnConv3DTransposeOpMaker(framework::OpProto* proto, CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: Conv3DTransposeOpMaker(proto, op_checker) { : Conv3DTransposeOpMaker(proto, op_checker) {
AddAttr<int>("workspace_size_MB", AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, " "workspace size for cudnn, in MB, "
......
...@@ -58,8 +58,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -58,8 +58,8 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
framework::OpProto* proto, framework::OpAttrChecker* op_checker) OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"Input", "Input",
...@@ -123,8 +123,8 @@ Example: ...@@ -123,8 +123,8 @@ Example:
)DOC"); )DOC");
} }
Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
framework::OpProto* proto, framework::OpAttrChecker* op_checker) OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input", AddInput("Input",
"(Tensor) The input tensor of convolution transpose operator." "(Tensor) The input tensor of convolution transpose operator."
......
...@@ -30,14 +30,12 @@ using DDim = framework::DDim; ...@@ -30,14 +30,12 @@ using DDim = framework::DDim;
// operator implementations can reuse the code. // operator implementations can reuse the code.
class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Conv2DTransposeOpMaker(framework::OpProto* proto, Conv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker);
framework::OpAttrChecker* op_checker);
}; };
class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Conv3DTransposeOpMaker(framework::OpProto* proto, Conv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker);
framework::OpAttrChecker* op_checker);
}; };
class ConvTransposeOp : public framework::OperatorWithKernel { class ConvTransposeOp : public framework::OperatorWithKernel {
......
...@@ -62,7 +62,7 @@ class CosSimOp : public framework::OperatorWithKernel { ...@@ -62,7 +62,7 @@ class CosSimOp : public framework::OperatorWithKernel {
class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CosSimOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) CosSimOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The 1st input of cos_sim op."); AddInput("X", "The 1st input of cos_sim op.");
AddInput("Y", "The 2nd input of cos_sim op."); AddInput("Y", "The 2nd input of cos_sim op.");
......
...@@ -18,8 +18,7 @@ namespace paddle { ...@@ -18,8 +18,7 @@ namespace paddle {
namespace operators { namespace operators {
class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker { class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CRFDecodingOpMaker(framework::OpProto* proto, CRFDecodingOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Emission", AddInput("Emission",
"(LoDTensor, default: LoDTensor<float>). A LoDTensor with shape " "(LoDTensor, default: LoDTensor<float>). A LoDTensor with shape "
......
...@@ -52,7 +52,7 @@ class CropOp : public framework::OperatorWithKernel { ...@@ -52,7 +52,7 @@ class CropOp : public framework::OperatorWithKernel {
class CropOpMaker : public framework::OpProtoAndCheckerMaker { class CropOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CropOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) CropOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input of pad op. " "The input of pad op. "
...@@ -88,7 +88,8 @@ There are two ways to set shape: ...@@ -88,7 +88,8 @@ There are two ways to set shape:
The input should be a k-D tensor(k > 0 and k < 7). As an example: The input should be a k-D tensor(k > 0 and k < 7). As an example:
Given: Case 1:
Given
X = [[0, 1, 2, 0, 0] X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0] [0, 3, 4, 0, 0]
...@@ -107,6 +108,27 @@ we get: ...@@ -107,6 +108,27 @@ we get:
Out = [[1, 2], Out = [[1, 2],
[3, 4]]. [3, 4]].
Case 2:
Given
X = [[0, 1, 2, 5, 0]
[0, 3, 4, 6, 0]
[0, 0, 0, 0, 0]],
and
offsets = [0, 1],
and
Y = [[0, 0, 0]
[0, 0, 0]],
we get:
Out = [[1, 2, 5],
[3, 4, 6]].
)DOC"); )DOC");
} }
}; };
......
...@@ -111,8 +111,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { ...@@ -111,8 +111,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
CrossEntropyOpMaker(framework::OpProto* proto, CrossEntropyOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x D, " "(Tensor, default Tensor<float>), a 2-D tensor with shape N x D, "
......
...@@ -55,8 +55,7 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { ...@@ -55,8 +55,7 @@ class DecayedAdagradOp : public framework::OperatorWithKernel {
class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker { class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
DecayedAdagradOpMaker(framework::OpProto *proto, DecayedAdagradOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
......
...@@ -40,8 +40,7 @@ class DropoutOp : public framework::OperatorWithKernel { ...@@ -40,8 +40,7 @@ class DropoutOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
DropoutOpMaker(framework::OpProto* proto, DropoutOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of dropout op."); AddInput("X", "The input of dropout op.");
AddOutput("Out", "The output of dropout op."); AddOutput("Out", "The output of dropout op.");
......
...@@ -71,7 +71,7 @@ class GPUDropoutKernel : public framework::OpKernel<T> { ...@@ -71,7 +71,7 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
auto M = EigenMatrix<T>::Reshape(*mask, 1); auto M = EigenMatrix<T>::Reshape(*mask, 1);
Y.device(place) = X * M; Y.device(place) = X * M;
} else { } else {
Y.device(place) = X * dropout_prob; Y.device(place) = X * (1.0f - dropout_prob);
} }
} }
}; };
......
...@@ -57,7 +57,7 @@ class CPUDropoutKernel : public framework::OpKernel<T> { ...@@ -57,7 +57,7 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
auto Y = EigenMatrix<T>::Reshape(*y, 1); auto Y = EigenMatrix<T>::Reshape(*y, 1);
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
Y.device(place) = X * dropout_prob; Y.device(place) = X * (1.0f - dropout_prob);
} }
} }
}; };
......
...@@ -19,8 +19,7 @@ namespace paddle { ...@@ -19,8 +19,7 @@ namespace paddle {
namespace operators { namespace operators {
class ElementwiseAddOpMaker : public ElementwiseOpMaker { class ElementwiseAddOpMaker : public ElementwiseOpMaker {
public: public:
ElementwiseAddOpMaker(framework::OpProto* proto, ElementwiseAddOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("Add", "$Out = X + Y$"); SetComment("Add", "$Out = X + Y$");
AddComment(comment_); AddComment(comment_);
......
...@@ -19,8 +19,7 @@ namespace paddle { ...@@ -19,8 +19,7 @@ namespace paddle {
namespace operators { namespace operators {
class ElementwiseDivOpMaker : public ElementwiseOpMaker { class ElementwiseDivOpMaker : public ElementwiseOpMaker {
public: public:
ElementwiseDivOpMaker(framework::OpProto* proto, ElementwiseDivOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("Div", "$Out = X / Y$"); SetComment("Div", "$Out = X / Y$");
AddComment(comment_); AddComment(comment_);
......
...@@ -20,8 +20,7 @@ namespace operators { ...@@ -20,8 +20,7 @@ namespace operators {
class ElementwiseMulOpMaker : public ElementwiseOpMaker { class ElementwiseMulOpMaker : public ElementwiseOpMaker {
public: public:
ElementwiseMulOpMaker(framework::OpProto* proto, ElementwiseMulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("Mul", "$Out = X \\odot\\ Y$"); SetComment("Mul", "$Out = X \\odot\\ Y$");
AddComment(comment_); AddComment(comment_);
......
...@@ -43,8 +43,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { ...@@ -43,8 +43,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ElementwiseOpMaker(framework::OpProto* proto, ElementwiseOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The first input tensor of elementwise op"); AddInput("X", "(Tensor) The first input tensor of elementwise op");
AddInput("Y", "(Tensor) The second input tensor of elementwise op"); AddInput("Y", "(Tensor) The second input tensor of elementwise op");
......
...@@ -103,10 +103,12 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext> { ...@@ -103,10 +103,12 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext> {
MidWiseTransformIterator<T, platform::CPUDeviceContext>& operator++() { MidWiseTransformIterator<T, platform::CPUDeviceContext>& operator++() {
++j_; ++j_;
i_ = j_ / post_; if (UNLIKELY(j_ == post_)) {
if (UNLIKELY(i_ == n_)) { ++i_;
j_ = 0; j_ = 0;
i_ = 0; if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
} }
return *this; return *this;
} }
...@@ -125,10 +127,10 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext> { ...@@ -125,10 +127,10 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext> {
private: private:
const T* ptr_; const T* ptr_;
int i_; int64_t i_;
int64_t j_; int64_t j_;
int64_t n_; int64_t n_;
int post_; int64_t post_;
}; };
#ifdef __NVCC__ #ifdef __NVCC__
......
...@@ -19,8 +19,7 @@ namespace paddle { ...@@ -19,8 +19,7 @@ namespace paddle {
namespace operators { namespace operators {
class ElementwiseSubOpMaker : public ElementwiseOpMaker { class ElementwiseSubOpMaker : public ElementwiseOpMaker {
public: public:
ElementwiseSubOpMaker(framework::OpProto* proto, ElementwiseSubOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) { : ElementwiseOpMaker(proto, op_checker) {
SetComment("Sub", "$Out = X - Y$"); SetComment("Sub", "$Out = X - Y$");
AddComment(comment_); AddComment(comment_);
......
...@@ -55,7 +55,7 @@ class ExpandOp : public framework::OperatorWithKernel { ...@@ -55,7 +55,7 @@ class ExpandOp : public framework::OperatorWithKernel {
class ExpandOpMaker : public framework::OpProtoAndCheckerMaker { class ExpandOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ExpandOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) ExpandOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor, default Tensor<float>) A tensor with rank in [1, 6]." "(Tensor, default Tensor<float>) A tensor with rank in [1, 6]."
......
...@@ -54,8 +54,7 @@ class FeedOp : public framework::OperatorBase { ...@@ -54,8 +54,7 @@ class FeedOp : public framework::OperatorBase {
class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker { class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FeedOpInfoMaker(framework::OpProto *proto, FeedOpInfoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of feed op"); AddInput("X", "The input of feed op");
AddOutput("Out", "The output of feed op"); AddOutput("Out", "The output of feed op");
......
...@@ -61,8 +61,7 @@ class FetchOp : public framework::OperatorBase { ...@@ -61,8 +61,7 @@ class FetchOp : public framework::OperatorBase {
class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker { class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FetchOpInfoMaker(framework::OpProto *proto, FetchOpInfoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of fetch op"); AddInput("X", "The input of fetch op");
AddOutput("Out", "The output of fetch op"); AddOutput("Out", "The output of fetch op");
......
...@@ -52,7 +52,7 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { ...@@ -52,7 +52,7 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel {
framework::OpKernelType GetKernelType( framework::OpKernelType GetKernelType(
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType( return framework::OpKernelType(
static_cast<framework::DataType>(ctx.Attr<int>("dtype")), static_cast<framework::proto::DataType>(ctx.Attr<int>("dtype")),
ctx.device_context()); ctx.device_context());
} }
}; };
...@@ -60,13 +60,12 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { ...@@ -60,13 +60,12 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel {
class FillConstantBatchSizeLikeOpMaker class FillConstantBatchSizeLikeOpMaker
: public framework::OpProtoAndCheckerMaker { : public framework::OpProtoAndCheckerMaker {
public: public:
FillConstantBatchSizeLikeOpMaker(framework::OpProto *proto, FillConstantBatchSizeLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<int>("dtype", AddAttr<int>("dtype",
"(int, default 5 (FP32)) " "(int, default 5 (FP32)) "
"Output data type") "Output data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
AddInput("Input", AddInput("Input",
"(Tensor) Tensor " "(Tensor) Tensor "
"whose dim_idx th dimension is used to specify the batch_size"); "whose dim_idx th dimension is used to specify the batch_size");
......
...@@ -34,7 +34,8 @@ class FillConstantOp : public framework::OperatorBase { ...@@ -34,7 +34,8 @@ class FillConstantOp : public framework::OperatorBase {
using framework::OperatorBase::OperatorBase; using framework::OperatorBase::OperatorBase;
void Run(const framework::Scope &scope, void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override { const platform::DeviceContext &dev_ctx) const override {
auto data_type = static_cast<framework::DataType>(Attr<int>("dtype")); auto data_type =
static_cast<framework::proto::DataType>(Attr<int>("dtype"));
auto value = Attr<float>("value"); auto value = Attr<float>("value");
auto force_cpu = Attr<bool>("force_cpu"); auto force_cpu = Attr<bool>("force_cpu");
auto &out = auto &out =
...@@ -52,13 +53,12 @@ class FillConstantOp : public framework::OperatorBase { ...@@ -52,13 +53,12 @@ class FillConstantOp : public framework::OperatorBase {
class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker { class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FillConstantOpMaker(framework::OpProto *proto, FillConstantOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<int>("dtype", AddAttr<int>("dtype",
"(int, default 5 (FP32)) " "(int, default 5 (FP32)) "
"Output data type") "Output data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output"); AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output");
AddAttr<float>("value", "(float, default 0) The value to be filled") AddAttr<float>("value", "(float, default 0) The value to be filled")
.SetDefault(0.0f); .SetDefault(0.0f);
......
...@@ -48,7 +48,7 @@ class FillOp : public framework::OperatorBase { ...@@ -48,7 +48,7 @@ class FillOp : public framework::OperatorBase {
"Cannot find variable %s", Output("Out")) "Cannot find variable %s", Output("Out"))
.GetMutable<framework::LoDTensor>()); .GetMutable<framework::LoDTensor>());
out.Resize(framework::make_ddim(Attr<std::vector<int>>("shape"))); out.Resize(framework::make_ddim(Attr<std::vector<int>>("shape")));
auto dtype = static_cast<framework::DataType>(Attr<int>("dtype")); auto dtype = static_cast<framework::proto::DataType>(Attr<int>("dtype"));
platform::CPUPlace cpu; platform::CPUPlace cpu;
auto force_cpu = Attr<bool>("force_cpu"); auto force_cpu = Attr<bool>("force_cpu");
out.mutable_data(force_cpu ? cpu : dev_ctx.GetPlace(), out.mutable_data(force_cpu ? cpu : dev_ctx.GetPlace(),
...@@ -76,7 +76,7 @@ class FillOp : public framework::OperatorBase { ...@@ -76,7 +76,7 @@ class FillOp : public framework::OperatorBase {
class FillOpMaker : public framework::OpProtoAndCheckerMaker { class FillOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FillOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) FillOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddComment(R"DOC(Fill operator AddComment(R"DOC(Fill operator
...@@ -88,7 +88,7 @@ Fill an tensor with `value` and `shape`. The type of the tensor is specify by ...@@ -88,7 +88,7 @@ Fill an tensor with `value` and `shape`. The type of the tensor is specify by
"value", "The float values of tensor, which are flatten in row major"); "value", "The float values of tensor, which are flatten in row major");
AddAttr<std::vector<int>>("shape", "The shape of output tensor"); AddAttr<std::vector<int>>("shape", "The shape of output tensor");
AddAttr<int>("dtype", "The data type of output tensor, Default is float") AddAttr<int>("dtype", "The data type of output tensor, Default is float")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
AddAttr<bool>("force_cpu", AddAttr<bool>("force_cpu",
"Whether the output tensor must be at CPU memory or not. " "Whether the output tensor must be at CPU memory or not. "
"Default is false.") "Default is false.")
......
...@@ -33,8 +33,7 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { ...@@ -33,8 +33,7 @@ class FillZerosLikeOp : public framework::OperatorWithKernel {
class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker { class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FillZerosLikeOpMaker(framework::OpProto *proto, FillZerosLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of fill-zeros-like op."); AddInput("X", "The input of fill-zeros-like op.");
AddOutput("Y", "The variable will be filled up with zeros."); AddOutput("Y", "The variable will be filled up with zeros.");
......
...@@ -57,7 +57,7 @@ class FTRLOp : public framework::OperatorWithKernel { ...@@ -57,7 +57,7 @@ class FTRLOp : public framework::OperatorWithKernel {
class FTRLOpMaker : public framework::OpProtoAndCheckerMaker { class FTRLOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FTRLOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) FTRLOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", AddInput("Param",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
......
...@@ -67,7 +67,7 @@ class GatherGradOp : public framework::OperatorWithKernel { ...@@ -67,7 +67,7 @@ class GatherGradOp : public framework::OperatorWithKernel {
class GatherOpMaker : public framework::OpProtoAndCheckerMaker { class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
GatherOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) GatherOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The source input of gather op"); AddInput("X", "The source input of gather op");
AddInput("Index", "The index input of gather op"); AddInput("Index", "The index input of gather op");
......
...@@ -60,15 +60,14 @@ class GaussianRandomOp : public framework::OperatorWithKernel { ...@@ -60,15 +60,14 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
framework::OpKernelType GetKernelType( framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType( return framework::OpKernelType(
static_cast<framework::DataType>(ctx.Attr<int>("dtype")), static_cast<framework::proto::DataType>(ctx.Attr<int>("dtype")),
ctx.device_context()); ctx.device_context());
} }
}; };
class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
GaussianRandomOpMaker(framework::OpProto* proto, GaussianRandomOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "Output matrix of gaussian random op"); AddOutput("Out", "Output matrix of gaussian random op");
...@@ -91,7 +90,7 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -91,7 +90,7 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<int>("dtype", AddAttr<int>("dtype",
"(int, default 5(FP32)) " "(int, default 5(FP32)) "
"Output data type.") "Output data type.")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
AddComment(R"DOC( AddComment(R"DOC(
GaussianRandom Operator. GaussianRandom Operator.
......
...@@ -67,7 +67,7 @@ class GRUOp : public framework::OperatorWithKernel { ...@@ -67,7 +67,7 @@ class GRUOp : public framework::OperatorWithKernel {
class GRUOpMaker : public framework::OpProtoAndCheckerMaker { class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
GRUOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) GRUOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input", AddInput("Input",
"(LoDTensor) The first input is a LodTensor, which supports " "(LoDTensor) The first input is a LodTensor, which supports "
......
...@@ -71,8 +71,7 @@ class GRUUnitOp : public framework::OperatorWithKernel { ...@@ -71,8 +71,7 @@ class GRUUnitOp : public framework::OperatorWithKernel {
class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
GRUUnitOpMaker(framework::OpProto* proto, GRUUnitOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input", AddInput("Input",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the " "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
......
...@@ -46,8 +46,7 @@ class HingeLossOp : public framework::OperatorWithKernel { ...@@ -46,8 +46,7 @@ class HingeLossOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class HingeLossOpMaker : public framework::OpProtoAndCheckerMaker { class HingeLossOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
HingeLossOpMaker(framework::OpProto* proto, HingeLossOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Logits", AddInput("Logits",
"The input value (Logits) of Hinge loss op." "The input value (Logits) of Hinge loss op."
......
...@@ -45,8 +45,7 @@ class HuberLossOp : public framework::OperatorWithKernel { ...@@ -45,8 +45,7 @@ class HuberLossOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker { class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
HuberLossOpMaker(framework::OpProto* proto, HuberLossOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input value of huber loss op." "The input value of huber loss op."
......
...@@ -70,8 +70,7 @@ class IncrementOp : public framework::OperatorBase { ...@@ -70,8 +70,7 @@ class IncrementOp : public framework::OperatorBase {
class IncrementOpMaker : public framework::OpProtoAndCheckerMaker { class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
IncrementOpMaker(framework::OpProto *proto, IncrementOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input tensor of increment operator"); AddInput("X", "(Tensor) The input tensor of increment operator");
AddOutput("Out", "(Tensor) The output tensor of increment operator."); AddOutput("Out", "(Tensor) The output tensor of increment operator.");
......
...@@ -47,8 +47,7 @@ class IsEmptyOp : public framework::OperatorBase { ...@@ -47,8 +47,7 @@ class IsEmptyOp : public framework::OperatorBase {
class IsEmptyOpProtoMaker : public framework::OpProtoAndCheckerMaker { class IsEmptyOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
IsEmptyOpProtoMaker(framework::OpProto *proto, IsEmptyOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(kInput, "(Tensor) Tensor which is to be checked."); AddInput(kInput, "(Tensor) Tensor which is to be checked.");
AddOutput(kOutput, "(Tensor) a boolean Tensor that indicate empty or not."); AddOutput(kOutput, "(Tensor) a boolean Tensor that indicate empty or not.");
......
...@@ -48,7 +48,7 @@ class L1NormGradOp : public framework::OperatorWithKernel { ...@@ -48,7 +48,7 @@ class L1NormGradOp : public framework::OperatorWithKernel {
class L1NormOpMaker : public framework::OpProtoAndCheckerMaker { class L1NormOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
L1NormOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) L1NormOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input of l1_norm op."); AddInput("X", "(Tensor) The input of l1_norm op.");
AddOutput("Out", "(Scalar) The output of l1_norm op."); AddOutput("Out", "(Scalar) The output of l1_norm op.");
......
...@@ -19,8 +19,7 @@ namespace operators { ...@@ -19,8 +19,7 @@ namespace operators {
class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LinearChainCRFOpMaker(framework::OpProto* proto, LinearChainCRFOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Emission", AddInput("Emission",
"(LoDTensor, default LoDTensor<float>) " "(LoDTensor, default LoDTensor<float>) "
......
...@@ -58,8 +58,7 @@ class LoadOp : public framework::OperatorBase { ...@@ -58,8 +58,7 @@ class LoadOp : public framework::OperatorBase {
class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LoadOpProtoMaker(framework::OpProto *proto, LoadOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "(Tensor) The tensor need to be loaded"); AddOutput("Out", "(Tensor) The tensor need to be loaded");
AddAttr<std::string>("file_path", AddAttr<std::string>("file_path",
......
...@@ -38,8 +38,7 @@ class LoDArrayLengthOp : public framework::OperatorBase { ...@@ -38,8 +38,7 @@ class LoDArrayLengthOp : public framework::OperatorBase {
class LoDArrayLengthProtoMaker : public framework::OpProtoAndCheckerMaker { class LoDArrayLengthProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LoDArrayLengthProtoMaker(framework::OpProto *proto, LoDArrayLengthProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensorArray) The input tensor array."); AddInput("X", "(LoDTensorArray) The input tensor array.");
AddOutput("Out", "(Tensor) 1x1 CPU Tensor of length, int64_t"); AddOutput("Out", "(Tensor) 1x1 CPU Tensor of length, int64_t");
......
...@@ -30,13 +30,13 @@ class LoDRankTableOp : public framework::OperatorBase { ...@@ -30,13 +30,13 @@ class LoDRankTableOp : public framework::OperatorBase {
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>(); scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level")); VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level"));
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level"))); out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
VLOG(10) << Input("X") << "'s lod information is " << *out;
} }
}; };
class LoDRankTableOpProtoMaker : public framework::OpProtoAndCheckerMaker { class LoDRankTableOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LoDRankTableOpProtoMaker(framework::OpProto *proto, LoDRankTableOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(LoDTensor) input lod tensor, must contain lod information."); "(LoDTensor) input lod tensor, must contain lod information.");
...@@ -67,7 +67,7 @@ class LoDRankTableInferVarType : public framework::VarTypeInference { ...@@ -67,7 +67,7 @@ class LoDRankTableInferVarType : public framework::VarTypeInference {
framework::BlockDescBind *block) const override { framework::BlockDescBind *block) const override {
for (auto &o : op_desc.Output("Out")) { for (auto &o : op_desc.Output("Out")) {
block->FindRecursiveOrCreateVar(o)->SetType( block->FindRecursiveOrCreateVar(o)->SetType(
framework::VarDesc::LOD_RANK_TABLE); framework::proto::VarDesc::LOD_RANK_TABLE);
} }
} }
}; };
......
...@@ -48,8 +48,7 @@ class LoDResetOp : public framework::OperatorWithKernel { ...@@ -48,8 +48,7 @@ class LoDResetOp : public framework::OperatorWithKernel {
class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker { class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LoDResetOpMaker(framework::OpProto *proto, LoDResetOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor) The input tensor of lod_reset operator."); AddInput("X", "(LoDTensor) The input tensor of lod_reset operator.");
AddInput("TargetLoD", AddInput("TargetLoD",
......
...@@ -97,8 +97,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase { ...@@ -97,8 +97,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
class LoDTensorToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker { class LoDTensorToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LoDTensorToArrayOpProtoMaker(framework::OpProto *proto, LoDTensorToArrayOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", ""); AddInput("X", "");
AddInput("RankTable", ""); AddInput("RankTable", "");
...@@ -131,7 +130,7 @@ class LoDTensorToArrayInferVarType : public framework::VarTypeInference { ...@@ -131,7 +130,7 @@ class LoDTensorToArrayInferVarType : public framework::VarTypeInference {
void operator()(const framework::OpDescBind &op_desc, void operator()(const framework::OpDescBind &op_desc,
framework::BlockDescBind *block) const override { framework::BlockDescBind *block) const override {
for (auto &out_var : op_desc.Output("Out")) { for (auto &out_var : op_desc.Output("Out")) {
block->Var(out_var)->SetType(framework::VarDesc::LOD_TENSOR_ARRAY); block->Var(out_var)->SetType(framework::proto::VarDesc::LOD_TENSOR_ARRAY);
} }
} }
}; };
......
...@@ -46,8 +46,7 @@ class LogLossOp : public framework::OperatorWithKernel { ...@@ -46,8 +46,7 @@ class LogLossOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class LogLossOpMaker : public framework::OpProtoAndCheckerMaker { class LogLossOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LogLossOpMaker(framework::OpProto* proto, LogLossOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Predicted", AddInput("Predicted",
"The input value (Predicted) of Log loss op." "The input value (Predicted) of Log loss op."
......
...@@ -20,8 +20,7 @@ namespace operators { ...@@ -20,8 +20,7 @@ namespace operators {
template <typename OpComment> template <typename OpComment>
class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker { class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
BinaryLogicalOpProtoMaker(framework::OpProto *proto, BinaryLogicalOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
OpComment comment; OpComment comment;
AddInput("X", AddInput("X",
...@@ -45,8 +44,7 @@ Each element of Out is calculated by %s ...@@ -45,8 +44,7 @@ Each element of Out is calculated by %s
template <typename OpComment> template <typename OpComment>
class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker { class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
UnaryLogicalOpProtoMaker(framework::OpProto *proto, UnaryLogicalOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
OpComment comment; OpComment comment;
AddInput("X", string::Sprintf("(LoDTensor) Operand of %s operator", AddInput("X", string::Sprintf("(LoDTensor) Operand of %s operator",
......
...@@ -51,8 +51,7 @@ class LookupTableOp : public framework::OperatorWithKernel { ...@@ -51,8 +51,7 @@ class LookupTableOp : public framework::OperatorWithKernel {
class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LookupTableOpMaker(framework::OpProto* proto, LookupTableOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("W", AddInput("W",
"An input represents embedding tensors, " "An input represents embedding tensors, "
...@@ -117,11 +116,12 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { ...@@ -117,11 +116,12 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
if (is_sparse) { if (is_sparse) {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(out_var_name)->SetType(framework::VarDesc::SELECTED_ROWS); block->Var(out_var_name)
->SetType(framework::proto::VarDesc::SELECTED_ROWS);
} else { } else {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::VarDesc::LOD_TENSOR); block->Var(out_var_name)->SetType(framework::proto::VarDesc::LOD_TENSOR);
} }
} }
}; };
......
...@@ -140,7 +140,7 @@ class LRNOp : public framework::OperatorWithKernel { ...@@ -140,7 +140,7 @@ class LRNOp : public framework::OperatorWithKernel {
template <typename T> template <typename T>
class LRNOpMaker : public framework::OpProtoAndCheckerMaker { class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LRNOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) LRNOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor) The input of LRN operator. " "(Tensor) The input of LRN operator. "
......
...@@ -102,7 +102,7 @@ class LSTMOp : public framework::OperatorWithKernel { ...@@ -102,7 +102,7 @@ class LSTMOp : public framework::OperatorWithKernel {
class LSTMOpMaker : public framework::OpProtoAndCheckerMaker { class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LSTMOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) LSTMOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input", AddInput("Input",
"(LoDTensor) the first input is a LodTensor, which support " "(LoDTensor) the first input is a LodTensor, which support "
......
...@@ -48,10 +48,12 @@ class LstmUnitOp : public framework::OperatorWithKernel { ...@@ -48,10 +48,12 @@ class LstmUnitOp : public framework::OperatorWithKernel {
class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker { class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LstmUnitOpMaker(framework::OpProto* proto, LstmUnitOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "FC input before the non-linear activation."); AddInput("X",
"Lstm unit only applies non-linear activations, please make sure"
"that linear tranformation has already been applied to `X`. "
"Linear tranformation can be applied by adding a `fc` layer");
AddInput( AddInput(
"C_prev", "C_prev",
"The cell state tensor of last time-step in the Lstm Unit operator."); "The cell state tensor of last time-step in the Lstm Unit operator.");
......
...@@ -42,8 +42,7 @@ class MarginRankLossOp : public framework::OperatorWithKernel { ...@@ -42,8 +42,7 @@ class MarginRankLossOp : public framework::OperatorWithKernel {
template <typename T> template <typename T>
class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker { class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MarginRankLossOpMaker(framework::OpProto *proto, MarginRankLossOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X1", AddInput("X1",
"(2-D tensor with shape [batch_size x 1]) The score for " "(2-D tensor with shape [batch_size x 1]) The score for "
......
...@@ -61,14 +61,13 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, ...@@ -61,14 +61,13 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
T* col_data = col->data<T>(); T* col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width; int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height; int h_offset = (c / filter_width) % filter_height;
int c_im = c / filter_width / filter_height; int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) { for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) { for (int w = 0; w < col_width; ++w) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1]; int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
int col_idx = (c * col_height + h) * col_width + w; int col_idx = (c * col_height + h) * col_width + w;
int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx; int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
...@@ -130,16 +129,14 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, ...@@ -130,16 +129,14 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width; int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height; int h_offset = (c / filter_width) % filter_height;
int c_im = c / filter_width / filter_height; int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) { for (int h = 0; h < col_height; ++h) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) { for (int w = 0; w < col_width; ++w) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1]; int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
if ((im_row_idx) >= 0 && (im_row_idx) < im_height && if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
(im_col_idx) >= 0 && (im_col_idx) < im_width) { (im_col_idx) >= 0 && (im_col_idx) < im_width) {
im_row_idx += c_im * im_height; im_data[(im_row_idx + c_im * im_height) * im_width + im_col_idx] +=
im_data[im_row_idx * im_width + im_col_idx] +=
col_data[(c * col_height + h) * col_width + w]; col_data[(c * col_height + h) * col_width + w];
} }
} }
...@@ -199,12 +196,13 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, ...@@ -199,12 +196,13 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
for (int channel = 0; channel < im_channels; ++channel) { for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height; for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) { ++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width; for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) { ++filter_col_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
int im_col_offset = int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1]; col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset = int col_offset =
((((col_row_idx)*col_width + col_col_idx) * im_channels + ((((col_row_idx)*col_width + col_col_idx) * im_channels +
channel) * channel) *
...@@ -271,12 +269,13 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF, ...@@ -271,12 +269,13 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
for (int channel = 0; channel < im_channels; ++channel) { for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height; for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) { ++filter_row_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width; for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) { ++filter_col_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
int im_col_offset = int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1]; col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset = int col_offset =
(((col_row_idx * col_width + col_col_idx) * im_channels + (((col_row_idx * col_width + col_col_idx) * im_channels +
channel) * channel) *
...@@ -284,6 +283,7 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF, ...@@ -284,6 +283,7 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
filter_row_idx) * filter_row_idx) *
filter_width + filter_width +
filter_col_idx; filter_col_idx;
if (im_row_offset >= 0 && im_row_offset < im_height && if (im_row_offset >= 0 && im_row_offset < im_height &&
im_col_offset >= 0 && im_col_offset < im_width) { im_col_offset >= 0 && im_col_offset < im_width) {
int im_offset = int im_offset =
......
...@@ -277,6 +277,14 @@ void set_constant_with_place<platform::CPUPlace>( ...@@ -277,6 +277,14 @@ void set_constant_with_place<platform::CPUPlace>(
TensorSetConstantCPU(tensor, value)); TensorSetConstantCPU(tensor, value));
} }
template <>
void set_constant_with_place<platform::MKLDNNPlace>(
const platform::DeviceContext& context, framework::Tensor* tensor,
float value) {
framework::VisitDataType(framework::ToDataType(tensor->type()),
TensorSetConstantCPU(tensor, value));
}
struct TensorSetConstantWithPlace : public boost::static_visitor<void> { struct TensorSetConstantWithPlace : public boost::static_visitor<void> {
TensorSetConstantWithPlace(const platform::DeviceContext& context, TensorSetConstantWithPlace(const platform::DeviceContext& context,
framework::Tensor* tensor, float value) framework::Tensor* tensor, float value)
......
...@@ -274,7 +274,7 @@ void set_constant_with_place<platform::GPUPlace>( ...@@ -274,7 +274,7 @@ void set_constant_with_place<platform::GPUPlace>(
} }
template <> template <>
void set_constant_with_place<platform::CudnnPlace>( void set_constant_with_place<platform::CUDNNPlace>(
const platform::DeviceContext& context, framework::Tensor* tensor, const platform::DeviceContext& context, framework::Tensor* tensor,
float value) { float value) {
set_constant_with_place<platform::GPUPlace>(context, tensor, value); set_constant_with_place<platform::GPUPlace>(context, tensor, value);
......
...@@ -130,7 +130,7 @@ class MatMulOp : public framework::OperatorWithKernel { ...@@ -130,7 +130,7 @@ class MatMulOp : public framework::OperatorWithKernel {
class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MatMulOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) MatMulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of MatMul op"); AddInput("X", "The first input of MatMul op");
AddInput("Y", "The second input of MatMul op"); AddInput("Y", "The second input of MatMul op");
......
...@@ -40,8 +40,7 @@ class MaxSeqenceLenOp : public framework::OperatorBase { ...@@ -40,8 +40,7 @@ class MaxSeqenceLenOp : public framework::OperatorBase {
class MaxSeqenceLenOpProtoMaker : public framework::OpProtoAndCheckerMaker { class MaxSeqenceLenOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MaxSeqenceLenOpProtoMaker(framework::OpProto *proto, MaxSeqenceLenOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("RankTable", "The lod_rank_table."); AddInput("RankTable", "The lod_rank_table.");
AddOutput("Out", "The max sequence length."); AddOutput("Out", "The max sequence length.");
......
...@@ -20,7 +20,7 @@ using framework::Tensor; ...@@ -20,7 +20,7 @@ using framework::Tensor;
class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker { class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MaxOutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) MaxOutOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
......
...@@ -32,7 +32,7 @@ class MeanOp : public framework::OperatorWithKernel { ...@@ -32,7 +32,7 @@ class MeanOp : public framework::OperatorWithKernel {
class MeanOpMaker : public framework::OpProtoAndCheckerMaker { class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MeanOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) MeanOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op"); AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op"); AddOutput("Out", "The output of mean op");
......
...@@ -114,8 +114,7 @@ class MergeLoDTensorOp : public framework::OperatorBase { ...@@ -114,8 +114,7 @@ class MergeLoDTensorOp : public framework::OperatorBase {
class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MergeLoDTensorOpProtoMaker(framework::OpProto *proto, MergeLoDTensorOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input LoDTensor, contains complete lod information to " "The input LoDTensor, contains complete lod information to "
......
...@@ -46,7 +46,7 @@ class MinusOp : public framework::OperatorWithKernel { ...@@ -46,7 +46,7 @@ class MinusOp : public framework::OperatorWithKernel {
class MinusOpMaker : public framework::OpProtoAndCheckerMaker { class MinusOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MinusOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The left tensor of minus operator."); AddInput("X", "The left tensor of minus operator.");
AddInput("Y", "The right tensor of minus operator."); AddInput("Y", "The right tensor of minus operator.");
......
...@@ -39,8 +39,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { ...@@ -39,8 +39,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel {
class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker { class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ModifiedHuberLossOpMaker(framework::OpProto* proto, ModifiedHuberLossOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input tensor of modified huber loss op. " "The input tensor of modified huber loss op. "
......
...@@ -54,8 +54,7 @@ class MomentumOp : public framework::OperatorWithKernel { ...@@ -54,8 +54,7 @@ class MomentumOp : public framework::OperatorWithKernel {
class MomentumOpMaker : public framework::OpProtoAndCheckerMaker { class MomentumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MomentumOpMaker(framework::OpProto *proto, MomentumOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", AddInput("Param",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
......
...@@ -71,7 +71,7 @@ class MulOpShapeInference : public framework::InferShapeBase { ...@@ -71,7 +71,7 @@ class MulOpShapeInference : public framework::InferShapeBase {
class MulOpMaker : public framework::OpProtoAndCheckerMaker { class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MulOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) MulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op"); AddInput("X", "The first input of mul op");
AddInput("Y", "The second input of mul op"); AddInput("Y", "The second input of mul op");
......
...@@ -61,8 +61,7 @@ class MultiplexOp : public framework::OperatorWithKernel { ...@@ -61,8 +61,7 @@ class MultiplexOp : public framework::OperatorWithKernel {
class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker { class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MultiplexOpMaker(framework::OpProto* proto, MultiplexOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ids", "The index tensor of multiplex operator."); AddInput("Ids", "The index tensor of multiplex operator.");
AddInput("X", "The candidate tensors of multiplex operator.") AddInput("X", "The candidate tensors of multiplex operator.")
......
...@@ -43,8 +43,7 @@ class NCCLInitOp : public framework::OperatorBase { ...@@ -43,8 +43,7 @@ class NCCLInitOp : public framework::OperatorBase {
class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
NCCLInitOpMaker(framework::OpProto *proto, NCCLInitOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Communicator", AddOutput("Communicator",
"Create Communicator for communicating between gpus"); "Create Communicator for communicating between gpus");
...@@ -52,7 +51,7 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -52,7 +51,7 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<int>("dtype", AddAttr<int>("dtype",
"(int, default 5 (FP32)) " "(int, default 5 (FP32)) "
"Output data type") "Output data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
AddComment(R"DOC( AddComment(R"DOC(
NCCLInit Operator. NCCLInit Operator.
...@@ -141,8 +140,7 @@ class NCCLBcastOp : public framework::OperatorWithKernel { ...@@ -141,8 +140,7 @@ class NCCLBcastOp : public framework::OperatorWithKernel {
// AllreduceOp // AllreduceOp
class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker { class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
NCCLAllReduceOpMaker(framework::OpProto *proto, NCCLAllReduceOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of AllReduce op"); AddInput("X", "The input of AllReduce op");
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
...@@ -163,8 +161,7 @@ AllReduce the input tensors. ...@@ -163,8 +161,7 @@ AllReduce the input tensors.
// ReduceOp // ReduceOp
class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker { class NCCLReduceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
NCCLReduceOpMaker(framework::OpProto *proto, NCCLReduceOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of Reduce op"); AddInput("X", "The input of Reduce op");
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
...@@ -190,8 +187,7 @@ Reduce the tensors. ...@@ -190,8 +187,7 @@ Reduce the tensors.
// BcastOp // BcastOp
class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker { class NCCLBcastOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
NCCLBcastOpMaker(framework::OpProto *proto, NCCLBcastOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of BcastSend op"); AddInput("X", "The input of BcastSend op");
AddInput("Communicator", "Communicator for communicating between gpus"); AddInput("Communicator", "Communicator for communicating between gpus");
......
...@@ -73,7 +73,7 @@ class NCEOp : public framework::OperatorWithKernel { ...@@ -73,7 +73,7 @@ class NCEOp : public framework::OperatorWithKernel {
class NCEOpMaker : public framework::OpProtoAndCheckerMaker { class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
NCEOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) NCEOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input", "(Tensor) A tensor of shape [batch_size, dim]."); AddInput("Input", "(Tensor) A tensor of shape [batch_size, dim].");
AddInput( AddInput(
......
...@@ -35,8 +35,8 @@ Here we give some examples to show how these rules will be used. ...@@ -35,8 +35,8 @@ Here we give some examples to show how these rules will be used.
```c++ ```c++
class AccumulateOpMaker : public framework::OpProtoAndCheckerMaker { class AccumulateOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AccumulateOpMaker(framework::OpProto *proto, AccumulateOpMaker(OpProto *proto,
framework::OpAttrChecker *op_checker) OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input tensor that has to be accumulated to the output tensor. AddInput("X", "(Tensor) The input tensor that has to be accumulated to the output tensor.
If the output size is not the same as input size, If the output size is not the same as input size,
......
# Standard Markdown Format for Operators
The following should be the standard format for documentation for all the operators that will get rendered in the `html`:
```
Operator Name (In PaddlePaddle)
Operator Name (Standard)
Operator description.
LaTeX equation of how the operator performs an update.
The signature of the operator.
```
Each section mentioned above has been covered in further detail in the rest of the document.
# PaddlePaddle Operator Name
This should be in all small letters, in case of multiple words, we separate them with an underscore. For example:
`array to lod tensor` should be written as `array_to_lod_tensor`.
This naming convention should be standard across all PaddlePaddle operators.
# Standard Operator Name
This is the standard name of the operator as used in the community. The general standard is usually:
- Standard abbreviations like `SGD` are written in all capital letters.
- Operator names that have multiple words inside a single word use `camelCase` (capitalize word boundaries inside of a word).
- Keep numbers inside a word as is, with no boundary delimiters.
- Follow the name of the operator with the keyword: `Activation Operator.`
# Operator description
This section should contain the description of what the operator does, including the operation performed, the literature from where it comes and was introduced first, and other important details. The relevant paper/article including the hyperlink should be cited in this section.
# LaTeX equation
This section should contain an overall equation of the update or operation that the operator performs. The variables used in the equation should follow the naming convention of operators as described [here](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md). Two words in the same word should be separated by an underscore (`_`).
# The signature
This section describes the signature of the operator. A list of Inputs and Outputs, each of which have a small description of what the variable represents and the type of variable. The variable names follow the `CamelCase` naming convention. The proposed format for this is:
`Section :
VariableName : (VariableType) VariableDescription
...
...
`
The following example for an `sgd` operator covers the above mentioned sections as they would ideally look like in the `html`:
```
sgd
SGD operator
This operator implements one step of the stochastic gradient descent algorithm.
param_out = param_learning_rate * grad
Inputs:
Param : (Tensor) Input parameter
LearningRate : (Tensor) Learning rate of SGD
Grad : (Tensor) Input gradient
Outputs:
ParamOut : (Tensor) Output parameter
```
...@@ -48,7 +48,7 @@ class PadOp : public framework::OperatorWithKernel { ...@@ -48,7 +48,7 @@ class PadOp : public framework::OperatorWithKernel {
class PadOpMaker : public framework::OpProtoAndCheckerMaker { class PadOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
PadOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) PadOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input of pad op. " "The input of pad op. "
......
...@@ -67,8 +67,7 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { ...@@ -67,8 +67,7 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
} }
Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto, Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
...@@ -136,8 +135,7 @@ Example: ...@@ -136,8 +135,7 @@ Example:
)DOC"); )DOC");
} }
Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto, Pool3dOpMaker::Pool3dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor) The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
......
...@@ -40,14 +40,12 @@ class PoolOpGrad : public framework::OperatorWithKernel { ...@@ -40,14 +40,12 @@ class PoolOpGrad : public framework::OperatorWithKernel {
class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker { class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Pool2dOpMaker(framework::OpProto* proto, Pool2dOpMaker(OpProto* proto, OpAttrChecker* op_checker);
framework::OpAttrChecker* op_checker);
}; };
class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker { class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Pool3dOpMaker(framework::OpProto* proto, Pool3dOpMaker(OpProto* proto, OpAttrChecker* op_checker);
framework::OpAttrChecker* op_checker);
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
......
...@@ -100,8 +100,7 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { ...@@ -100,8 +100,7 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MaxPool2dWithIndexOpMaker(framework::OpProto *proto, MaxPool2dWithIndexOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
...@@ -178,8 +177,7 @@ Example: ...@@ -178,8 +177,7 @@ Example:
class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MaxPool3dWithIndexOpMaker(framework::OpProto *proto, MaxPool3dWithIndexOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor) The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
......
...@@ -95,8 +95,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { ...@@ -95,8 +95,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel {
class PositiveNegativePairOpMaker : public framework::OpProtoAndCheckerMaker { class PositiveNegativePairOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
PositiveNegativePairOpMaker(framework::OpProto *proto, PositiveNegativePairOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Score", AddInput("Score",
"(Tensor, float) Model Score on an item (with " "(Tensor, float) Model Score on an item (with "
......
...@@ -90,8 +90,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { ...@@ -90,8 +90,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel {
class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker { class PrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
PrecisionRecallOpMaker(framework::OpProto *proto, PrecisionRecallOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("MaxProbs", AddInput("MaxProbs",
"(Tensor, default Tensor<float>) A 2-D tensor with shape N x 1, " "(Tensor, default Tensor<float>) A 2-D tensor with shape N x 1, "
......
...@@ -38,7 +38,7 @@ class PReluOp : public framework::OperatorWithKernel { ...@@ -38,7 +38,7 @@ class PReluOp : public framework::OperatorWithKernel {
class PReluOpMaker : public framework::OpProtoAndCheckerMaker { class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
PReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) PReluOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of prelu operator."); AddInput("X", "The input tensor of prelu operator.");
AddInput("Alpha", "The alpha weight of prelu operator."); AddInput("Alpha", "The alpha weight of prelu operator.");
......
...@@ -59,8 +59,7 @@ class ProximalAdagradOp : public framework::OperatorWithKernel { ...@@ -59,8 +59,7 @@ class ProximalAdagradOp : public framework::OperatorWithKernel {
class ProximalAdagradOpMaker : public framework::OpProtoAndCheckerMaker { class ProximalAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ProximalAdagradOpMaker(framework::OpProto *proto, ProximalAdagradOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", AddInput("Param",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
......
...@@ -47,8 +47,7 @@ class ProximalGDOp : public framework::OperatorWithKernel { ...@@ -47,8 +47,7 @@ class ProximalGDOp : public framework::OperatorWithKernel {
class ProximalGDOpMaker : public framework::OpProtoAndCheckerMaker { class ProximalGDOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ProximalGDOpMaker(framework::OpProto *proto, ProximalGDOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", AddInput("Param",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
......
...@@ -45,8 +45,7 @@ class RankLossOp : public framework::OperatorWithKernel { ...@@ -45,8 +45,7 @@ class RankLossOp : public framework::OperatorWithKernel {
class RankLossOpMaker : public framework::OpProtoAndCheckerMaker { class RankLossOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RankLossOpMaker(framework::OpProto *proto, RankLossOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Label", AddInput("Label",
"(2-D Tensor with shape [batch_size x 1]) " "(2-D Tensor with shape [batch_size x 1]) "
......
...@@ -497,8 +497,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -497,8 +497,7 @@ class RecurrentGradOp : public RecurrentBase {
class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker { class RecurrentOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RecurrentOpProtoMaker(framework::OpProto *proto, RecurrentOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(kInputs, "rnn inputs").AsDuplicable(); AddInput(kInputs, "rnn inputs").AsDuplicable();
AddInput(kInitialStates, "rnn initial states").AsDuplicable(); AddInput(kInitialStates, "rnn initial states").AsDuplicable();
......
...@@ -97,7 +97,7 @@ class RecvOp : public framework::OperatorBase { ...@@ -97,7 +97,7 @@ class RecvOp : public framework::OperatorBase {
class RecvOpMaker : public framework::OpProtoAndCheckerMaker { class RecvOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RecvOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) RecvOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("RX", "(Tensor) Input tensor to be saved"); AddInput("RX", "(Tensor) Input tensor to be saved");
AddComment(R"DOC( AddComment(R"DOC(
......
...@@ -83,7 +83,7 @@ class ReduceGradOp : public framework::OperatorWithKernel { ...@@ -83,7 +83,7 @@ class ReduceGradOp : public framework::OperatorWithKernel {
class ReduceOpMaker : public framework::OpProtoAndCheckerMaker { class ReduceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ReduceOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ReduceOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor) The input tensor. Tensors with rank at most 6 are " "(Tensor) The input tensor. Tensors with rank at most 6 are "
...@@ -135,8 +135,7 @@ If reduce_all is true, just reduce along all dimensions and output a scalar. ...@@ -135,8 +135,7 @@ If reduce_all is true, just reduce along all dimensions and output a scalar.
class ReduceSumOpMaker : public ReduceOpMaker { class ReduceSumOpMaker : public ReduceOpMaker {
public: public:
ReduceSumOpMaker(framework::OpProto *proto, ReduceSumOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: ReduceOpMaker(proto, op_checker) { : ReduceOpMaker(proto, op_checker) {
SetComment("ReduceSum", "sum"); SetComment("ReduceSum", "sum");
AddComment(comment_); AddComment(comment_);
...@@ -145,8 +144,7 @@ class ReduceSumOpMaker : public ReduceOpMaker { ...@@ -145,8 +144,7 @@ class ReduceSumOpMaker : public ReduceOpMaker {
class ReduceMeanOpMaker : public ReduceOpMaker { class ReduceMeanOpMaker : public ReduceOpMaker {
public: public:
ReduceMeanOpMaker(framework::OpProto *proto, ReduceMeanOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: ReduceOpMaker(proto, op_checker) { : ReduceOpMaker(proto, op_checker) {
SetComment("ReduceMean", "mean"); SetComment("ReduceMean", "mean");
AddComment(comment_); AddComment(comment_);
...@@ -155,8 +153,7 @@ class ReduceMeanOpMaker : public ReduceOpMaker { ...@@ -155,8 +153,7 @@ class ReduceMeanOpMaker : public ReduceOpMaker {
class ReduceMaxOpMaker : public ReduceOpMaker { class ReduceMaxOpMaker : public ReduceOpMaker {
public: public:
ReduceMaxOpMaker(framework::OpProto *proto, ReduceMaxOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: ReduceOpMaker(proto, op_checker) { : ReduceOpMaker(proto, op_checker) {
SetComment("ReduceMax", "max"); SetComment("ReduceMax", "max");
AddComment(comment_); AddComment(comment_);
...@@ -165,8 +162,7 @@ class ReduceMaxOpMaker : public ReduceOpMaker { ...@@ -165,8 +162,7 @@ class ReduceMaxOpMaker : public ReduceOpMaker {
class ReduceMinOpMaker : public ReduceOpMaker { class ReduceMinOpMaker : public ReduceOpMaker {
public: public:
ReduceMinOpMaker(framework::OpProto *proto, ReduceMinOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: ReduceOpMaker(proto, op_checker) { : ReduceOpMaker(proto, op_checker) {
SetComment("ReduceMin", "min"); SetComment("ReduceMin", "min");
AddComment(comment_); AddComment(comment_);
......
...@@ -34,21 +34,33 @@ class ReshapeOp : public framework::OperatorWithKernel { ...@@ -34,21 +34,33 @@ class ReshapeOp : public framework::OperatorWithKernel {
auto shape = ctx->Attrs().Get<std::vector<int>>("shape"); auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty."); PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty.");
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
// TODO(qiao) change batch_size
for (size_t i = 1; i < shape.size(); ++i) { std::vector<size_t> neg_dims_idx;
PADDLE_ENFORCE(shape[i] > 0, // set some dimension to -1 if it is unknown
"Each dimension of Attr(shape) " const int unknown_size = -1;
"must be positive except the first one."); for (size_t i = 0; i < shape.size(); ++i) {
} PADDLE_ENFORCE(shape[i] > 0 || shape[i] == unknown_size,
if (shape[0] < 0) { "Each dimension of Attr(shape) must be positive or %d.",
shape[0] = x_dims[0]; unknown_size);
if (shape[i] == unknown_size) {
neg_dims_idx.push_back(i);
PADDLE_ENFORCE(neg_dims_idx.size() <= 1,
"Only one dimension of Attr(shape) can be unknown.");
}
} }
// capacity check
int64_t capacity = int64_t capacity =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
int64_t in_size = framework::product(x_dims); int64_t in_size = framework::product(x_dims);
PADDLE_ENFORCE_EQ(capacity, in_size, if (neg_dims_idx.size() == 1) {
"The size of Input(X) mismatches with Attr(shape)."); // dim infer
shape[neg_dims_idx[0]] = in_size / (-capacity);
// recalculate capacity
capacity = shape[neg_dims_idx[0]] * (-capacity);
}
// capacity check
PADDLE_ENFORCE(capacity == in_size,
"The size of Input(X) mismatches with Attr(shape).");
// resize output // resize output
std::vector<int64_t> shape_int64(shape.size(), 0); std::vector<int64_t> shape_int64(shape.size(), 0);
std::transform(shape.begin(), shape.end(), shape_int64.begin(), std::transform(shape.begin(), shape.end(), shape_int64.begin(),
...@@ -65,8 +77,7 @@ class ReshapeOp : public framework::OperatorWithKernel { ...@@ -65,8 +77,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker { class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ReshapeOpMaker(framework::OpProto *proto, ReshapeOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input tensor of reshape operator."); AddInput("X", "The input tensor of reshape operator.");
AddOutput("Out", "The output tensor of reshape operator."); AddOutput("Out", "The output tensor of reshape operator.");
...@@ -88,6 +99,9 @@ the tensor X into a 2-D tensor: ...@@ -88,6 +99,9 @@ the tensor X into a 2-D tensor:
[[1, 2, 3, 4]] [[1, 2, 3, 4]]
One dimension in the target shape can be set -1, representing that its
size is unknown. In this case, the real dimension will be infered from
the original shape of Input(X) and other dimensions in the target shape.
)DOC"); )DOC");
} }
}; };
......
...@@ -63,8 +63,7 @@ class RmspropOp : public framework::OperatorWithKernel { ...@@ -63,8 +63,7 @@ class RmspropOp : public framework::OperatorWithKernel {
class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { class RmspropOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RmspropOpMaker(framework::OpProto *proto, RmspropOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", AddInput("Param",
"(Tensor, default Tensor<float>) " "(Tensor, default Tensor<float>) "
......
...@@ -57,15 +57,14 @@ class RNNMemoryHelperOpShapeInference : public framework::InferShapeBase { ...@@ -57,15 +57,14 @@ class RNNMemoryHelperOpShapeInference : public framework::InferShapeBase {
class RNNMemoryHelperOpInfoMaker : public framework::OpProtoAndCheckerMaker { class RNNMemoryHelperOpInfoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RNNMemoryHelperOpInfoMaker(framework::OpProto *proto, RNNMemoryHelperOpInfoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", ""); AddInput("X", "");
AddOutput("Out", ""); AddOutput("Out", "");
AddAttr<int>("dtype", AddAttr<int>("dtype",
"(int, default 5 (FP32)) " "(int, default 5 (FP32)) "
"Output data type") "Output data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
AddComment(""); AddComment("");
} }
}; };
...@@ -114,8 +113,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { ...@@ -114,8 +113,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
class RNNMemoryHelperGradOpInfoMaker class RNNMemoryHelperGradOpInfoMaker
: public framework::OpProtoAndCheckerMaker { : public framework::OpProtoAndCheckerMaker {
public: public:
RNNMemoryHelperGradOpInfoMaker(framework::OpProto *proto, RNNMemoryHelperGradOpInfoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(framework::GradVarName("Out"), ""); AddInput(framework::GradVarName("Out"), "");
AddInput("X", ""); AddInput("X", "");
...@@ -124,7 +122,7 @@ class RNNMemoryHelperGradOpInfoMaker ...@@ -124,7 +122,7 @@ class RNNMemoryHelperGradOpInfoMaker
AddAttr<int>("dtype", AddAttr<int>("dtype",
"(int, default 5 (FP32)) " "(int, default 5 (FP32)) "
"Output data type") "Output data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
AddComment(""); AddComment("");
} }
}; };
......
...@@ -99,8 +99,7 @@ class ROIPoolGradOp : public framework::OperatorWithKernel { ...@@ -99,8 +99,7 @@ class ROIPoolGradOp : public framework::OperatorWithKernel {
class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ROIPoolOpMaker(framework::OpProto* proto, ROIPoolOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor), " "(Tensor), "
......
...@@ -76,8 +76,7 @@ class RowConvGradOp : public framework::OperatorWithKernel { ...@@ -76,8 +76,7 @@ class RowConvGradOp : public framework::OperatorWithKernel {
class RowConvOpMaker : public framework::OpProtoAndCheckerMaker { class RowConvOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RowConvOpMaker(framework::OpProto *proto, RowConvOpMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(LoDTensor), the input(X) is a LodTensor, which supports " "(LoDTensor), the input(X) is a LodTensor, which supports "
......
...@@ -94,8 +94,7 @@ class SaveOp : public framework::OperatorBase { ...@@ -94,8 +94,7 @@ class SaveOp : public framework::OperatorBase {
class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker { class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SaveOpProtoMaker(framework::OpProto *proto, SaveOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor ) Input tensor to be saved"); AddInput("X", "(Tensor ) Input tensor to be saved");
AddComment(R"DOC( AddComment(R"DOC(
......
...@@ -38,7 +38,7 @@ class ScaleOp : public framework::OperatorWithKernel { ...@@ -38,7 +38,7 @@ class ScaleOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ScaleOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor of scale operator."); AddInput("X", "(Tensor) Input tensor of scale operator.");
AddOutput("Out", "(Tensor) Output tensor of scale operator."); AddOutput("Out", "(Tensor) Output tensor of scale operator.");
......
...@@ -78,8 +78,7 @@ class ScatterGradOp : public framework::OperatorWithKernel { ...@@ -78,8 +78,7 @@ class ScatterGradOp : public framework::OperatorWithKernel {
class ScatterOpMaker : public framework::OpProtoAndCheckerMaker { class ScatterOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ScatterOpMaker(framework::OpProto* proto, ScatterOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ref", "The source input of scatter op"); AddInput("Ref", "The source input of scatter op");
AddInput("Index", AddInput("Index",
......
...@@ -59,7 +59,7 @@ class SendOp : public framework::OperatorBase { ...@@ -59,7 +59,7 @@ class SendOp : public framework::OperatorBase {
class SendOpMaker : public framework::OpProtoAndCheckerMaker { class SendOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SendOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SendOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor to be saved"); AddInput("X", "(Tensor) Input tensor to be saved");
AddOutput("Out", "(Tensor) Output fetched from server"); AddOutput("Out", "(Tensor) Output fetched from server");
......
...@@ -43,8 +43,7 @@ class SequenceConcatOp : public framework::OperatorWithKernel { ...@@ -43,8 +43,7 @@ class SequenceConcatOp : public framework::OperatorWithKernel {
class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SequenceConcatOpMaker(framework::OpProto* proto, SequenceConcatOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(LodTensorArray) Input is a vector of LoDTensor, " "(LodTensorArray) Input is a vector of LoDTensor, "
......
...@@ -100,8 +100,7 @@ class SequenceConvGradOp : public framework::OperatorWithKernel { ...@@ -100,8 +100,7 @@ class SequenceConvGradOp : public framework::OperatorWithKernel {
class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker { class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SequenceConvOpMaker(framework::OpProto* proto, SequenceConvOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
......
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/seq_expand_op.h" #include "paddle/operators/sequence_expand_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using framework::Tensor; using framework::Tensor;
class SeqExpandOp : public framework::OperatorWithKernel { class SequenceExpandOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -35,25 +35,24 @@ class SeqExpandOp : public framework::OperatorWithKernel { ...@@ -35,25 +35,24 @@ class SeqExpandOp : public framework::OperatorWithKernel {
} }
}; };
class SeqExpandOpMaker : public framework::OpProtoAndCheckerMaker { class SequenceExpandOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SeqExpandOpMaker(framework::OpProto* proto, SequenceExpandOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor or LoDTensor) The input(X) of this operator can be a " "(Tensor or LoDTensor) The input(X) of this operator can be a "
"LoDTensor or a base Tensor."); "LoDTensor or a base Tensor.");
AddInput("Y", AddInput("Y",
"(LoDTensor)The reference input(Y) of seq_expand op." "(LoDTensor)The reference input(Y) of sequence_expand op."
"It must be a LoDTensor with k-level(k>0)." "It must be a LoDTensor with k-level(k>0)."
"The input(X) will be expanded according to LOD of input(Y)." "The input(X) will be expanded according to LOD of input(Y)."
"The element numbers of last level in input(Y) " "The element numbers of last level in input(Y) "
"must be equal to dims[0] of input(X)."); "must be equal to dims[0] of input(X).");
AddOutput("Out", AddOutput("Out",
"(LodTensor)The output of seq_expand op." "(LodTensor)The output of sequence_expand op."
"The lod of output will be as same as input(Y)'s lod."); "The lod of output will be as same as input(Y)'s lod.");
AddComment(R"DOC( AddComment(R"DOC(
Seq Expand Operator. Sequence Expand Operator.
This operator expands input(X) according to LOD of input(Y). This operator expands input(X) according to LOD of input(Y).
Following are cases to better explain how this works: Following are cases to better explain how this works:
...@@ -124,7 +123,7 @@ then we get 2-level LoDTensor ...@@ -124,7 +123,7 @@ then we get 2-level LoDTensor
} }
}; };
class SeqExpandOpGrad : public framework::OperatorWithKernel { class SequenceExpandOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -146,11 +145,11 @@ class SeqExpandOpGrad : public framework::OperatorWithKernel { ...@@ -146,11 +145,11 @@ class SeqExpandOpGrad : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(seq_expand, ops::SeqExpandOp, ops::SeqExpandOpMaker, REGISTER_OP(sequence_expand, ops::SequenceExpandOp, ops::SequenceExpandOpMaker,
seq_expand_grad, ops::SeqExpandOpGrad); sequence_expand_grad, ops::SequenceExpandOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
seq_expand, sequence_expand,
ops::SeqExpandKernel<paddle::platform::CPUDeviceContext, float>); ops::SequenceExpandKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
seq_expand_grad, sequence_expand_grad,
ops::SeqExpandGradKernel<paddle::platform::CPUDeviceContext, float>); ops::SequenceExpandGradKernel<paddle::platform::CPUDeviceContext, float>);
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
limitations under the License. */ limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/seq_expand_op.h" #include "paddle/operators/sequence_expand_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
seq_expand, sequence_expand,
ops::SeqExpandKernel<paddle::platform::CUDADeviceContext, float>); ops::SequenceExpandKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
seq_expand_grad, sequence_expand_grad,
ops::SeqExpandGradKernel<paddle::platform::CUDADeviceContext, float>); ops::SequenceExpandGradKernel<paddle::platform::CUDADeviceContext, float>);
...@@ -24,7 +24,7 @@ namespace operators { ...@@ -24,7 +24,7 @@ namespace operators {
using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor;
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SeqExpandKernel : public framework::OpKernel<T> { class SequenceExpandKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<LoDTensor>("X"); auto* x = context.Input<LoDTensor>("X");
...@@ -71,7 +71,7 @@ class SeqExpandKernel : public framework::OpKernel<T> { ...@@ -71,7 +71,7 @@ class SeqExpandKernel : public framework::OpKernel<T> {
* *
* */ * */
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SeqExpandGradKernel : public framework::OpKernel<T> { class SequenceExpandGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out")); auto* d_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
......
...@@ -37,8 +37,7 @@ class SequencePoolOp : public framework::OperatorWithKernel { ...@@ -37,8 +37,7 @@ class SequencePoolOp : public framework::OperatorWithKernel {
class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SequencePoolOpMaker(framework::OpProto* proto, SequencePoolOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp"); AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp");
AddOutput("Out", AddOutput("Out",
......
...@@ -79,8 +79,7 @@ class SequenceSliceGradOp : public framework::OperatorWithKernel { ...@@ -79,8 +79,7 @@ class SequenceSliceGradOp : public framework::OperatorWithKernel {
class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker { class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SequenceSliceOpMaker(framework::OpProto* proto, SequenceSliceOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(LoDTensor), " "(LoDTensor), "
......
...@@ -33,8 +33,7 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { ...@@ -33,8 +33,7 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel {
class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { class SequenceSoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SequenceSoftmaxOpMaker(framework::OpProto* proto, SequenceSoftmaxOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(LoDTensor) 1-D or 2-D input LoDTensor with the 2-nd dimension " "(LoDTensor) 1-D or 2-D input LoDTensor with the 2-nd dimension "
...@@ -51,10 +50,14 @@ input Tensor can be either [N, 1] or [N], where N is the sum of the length ...@@ -51,10 +50,14 @@ input Tensor can be either [N, 1] or [N], where N is the sum of the length
of all sequences. of all sequences.
The algorithm works as follows: The algorithm works as follows:
for i-th sequence in a mini-batch: for i-th sequence in a mini-batch:
$$Out(X[lod[i]:lod[i+1]], :) =
\frac{\exp(X[lod[i]:lod[i+1], :])} $$
{\sum(\exp(X[lod[i]:lod[i+1], :]))}$$ Out(X[lod[i]:lod[i+1]], :) = \
\frac{\exp(X[lod[i]:lod[i+1], :])} \
{\sum(\exp(X[lod[i]:lod[i+1], :]))}
$$
For example, for a mini-batch of 3 sequences with variable-length, For example, for a mini-batch of 3 sequences with variable-length,
each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7], each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
......
...@@ -43,7 +43,7 @@ class SGDOp : public framework::OperatorWithKernel { ...@@ -43,7 +43,7 @@ class SGDOp : public framework::OperatorWithKernel {
class SGDOpMaker : public framework::OpProtoAndCheckerMaker { class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SGDOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) SGDOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("LearningRate", "(Tensor) Learning rate of SGD"); AddInput("LearningRate", "(Tensor) Learning rate of SGD");
......
...@@ -54,8 +54,7 @@ class ShrinkRNNMemoryOp : public ArrayOp { ...@@ -54,8 +54,7 @@ class ShrinkRNNMemoryOp : public ArrayOp {
class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker { class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ShrinkRNNMemoryOpProtoMaker(framework::OpProto *proto, ShrinkRNNMemoryOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor) The RNN step memory to be shrinked."); AddInput("X", "(LoDTensor) The RNN step memory to be shrinked.");
AddInput("RankTable", "(LoDRankTable) The lod_rank_table of dynamic RNN."); AddInput("RankTable", "(LoDRankTable) The lod_rank_table of dynamic RNN.");
......
...@@ -86,8 +86,8 @@ class SigmoidCrossEntropyWithLogitsGradOp ...@@ -86,8 +86,8 @@ class SigmoidCrossEntropyWithLogitsGradOp
class SigmoidCrossEntropyWithLogitsOpMaker class SigmoidCrossEntropyWithLogitsOpMaker
: public framework::OpProtoAndCheckerMaker { : public framework::OpProtoAndCheckerMaker {
public: public:
SigmoidCrossEntropyWithLogitsOpMaker(framework::OpProto* proto, SigmoidCrossEntropyWithLogitsOpMaker(OpProto* proto,
framework::OpAttrChecker* op_checker) OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x D, " "(Tensor, default Tensor<float>), a 2-D tensor with shape N x D, "
......
...@@ -34,7 +34,7 @@ class SignOp : public framework::OperatorWithKernel { ...@@ -34,7 +34,7 @@ class SignOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class SignOpMaker : public framework::OpProtoAndCheckerMaker { class SignOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SignOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SignOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor of sign operator."); AddInput("X", "(Tensor) Input tensor of sign operator.");
AddOutput("Out", "(Tensor) Output tensor of sign operator."); AddOutput("Out", "(Tensor) Output tensor of sign operator.");
......
...@@ -47,8 +47,7 @@ class SmoothL1LossOp : public framework::OperatorWithKernel { ...@@ -47,8 +47,7 @@ class SmoothL1LossOp : public framework::OperatorWithKernel {
template <typename AttrType> template <typename AttrType>
class SmoothL1LossOpMaker : public framework::OpProtoAndCheckerMaker { class SmoothL1LossOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SmoothL1LossOpMaker(framework::OpProto* proto, SmoothL1LossOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"(Tensor, default Tensor<float>) A tensor with rank at least 2. " "(Tensor, default Tensor<float>) A tensor with rank at least 2. "
......
...@@ -36,8 +36,7 @@ class SoftmaxOp : public framework::OperatorWithKernel { ...@@ -36,8 +36,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftmaxOpMaker(framework::OpProto* proto, SoftmaxOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X",
"The input tensor of softmax. " "The input tensor of softmax. "
......
...@@ -20,8 +20,7 @@ namespace operators { ...@@ -20,8 +20,7 @@ namespace operators {
class SoftmaxWithCrossEntropyOpMaker class SoftmaxWithCrossEntropyOpMaker
: public framework::OpProtoAndCheckerMaker { : public framework::OpProtoAndCheckerMaker {
public: public:
SoftmaxWithCrossEntropyOpMaker(framework::OpProto* proto, SoftmaxWithCrossEntropyOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Logits", AddInput("Logits",
"(Tensor, default: Tensor<float>), The unscaled log probabilities " "(Tensor, default: Tensor<float>), The unscaled log probabilities "
......
...@@ -118,8 +118,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { ...@@ -118,8 +118,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker { class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SplitLoDTensorOpProtoMaker(framework::OpProto *proto, SplitLoDTensorOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input LoDTensor"); AddInput("X", "The input LoDTensor");
AddInput("Mask", "A bool column vector which mask the input"); AddInput("Mask", "A bool column vector which mask the input");
......
...@@ -65,7 +65,7 @@ class SplitOp : public framework::OperatorWithKernel { ...@@ -65,7 +65,7 @@ class SplitOp : public framework::OperatorWithKernel {
class SplitOpMaker : public framework::OpProtoAndCheckerMaker { class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SplitOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SplitOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor of the split operator."); AddInput("X", "(Tensor) Input tensor of the split operator.");
AddOutput("Out", "(Tensor) Output tensors of the split operator.") AddOutput("Out", "(Tensor) Output tensors of the split operator.")
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Indicesou may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/spp_op.h"
namespace paddle {
namespace operators {
class SppOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SppOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
"X",
"(Tensor) The input tensor of spp operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of feature.");
AddOutput("Out",
"(Tensor) The output tensor of spp operator."
"N * M."
"M = C * H * W");
AddAttr<int>("pyramid_height", "(int), multi level pooling");
AddAttr<std::string>(
"pooling_type",
"(string), pooling type, can be \"max\" for max-pooling "
"and \"avg\" for average-pooling.")
.InEnum({"max", "avg"});
AddComment(R"DOC(
"With spatial pyramid pooling, the input image can
be of any sizes. This not only allows arbitrary aspect
ratios, but also allows arbitrary scales. We can resize
the input image to any scale (e.g., min(w, h)=180, 224,
...) and apply the same deep network. When the
input image is at different scales, the network (with
the same filter sizes) will extract features at different
scales. The scales play important roles in traditional
methods.
Input shape: $(N, C_{in}, H_{in}, W_{in})$
Output shape: $(H_{out}, W_{out})$
Where
$$
H_{out} = N \\
W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in}
$$
paper https://arxiv.org/pdf/1406.4729v4.pdf
)DOC");
}
};
class SppOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SppOp"
"should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SppOp should not be null.");
auto in_x_dims = ctx->GetInputDim("X");
int pyramid_height = ctx->Attrs().Get<int>("pyramid_height");
PADDLE_ENFORCE(in_x_dims.size() == 4,
"Spping intput must be of 4-dimensional.");
int outlen = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1];
std::vector<int64_t> output_shape({in_x_dims[0], outlen});
ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
}
};
class SppOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(spp, ops::SppOp, ops::SppOpMaker, spp_grad, ops::SppOpGrad);
REGISTER_OP_CPU_KERNEL(
spp, ops::SppKernel<paddle::platform::CPUDeviceContext, float>,
ops::SppKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
spp_grad, ops::SppGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SppGradKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Indicesou may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/spp_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
spp, ops::SppKernel<paddle::platform::CUDADeviceContext, float>,
ops::SppKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
spp_grad, ops::SppGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SppGradKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Indicesou may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/pooling.h"
#include "paddle/operators/strided_memcpy.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class SppKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* in_x = context.Input<framework::Tensor>("X");
auto* out = context.Output<framework::Tensor>("Out");
int pyramid_height = context.template Attr<int>("pyramid_height");
std::string pooling_type =
context.template Attr<std::string>("pooling_type");
out->mutable_data<T>(context.GetPlace());
auto out_stride = framework::stride(out->dims());
int input_h = in_x->dims()[2];
int input_w = in_x->dims()[3];
size_t output_offset = 0;
for (int p = 0; p < pyramid_height; ++p) {
int bins = std::pow(2, p);
int kernel_size_h = std::ceil(input_h / static_cast<double>(bins));
int kernel_size_w = std::ceil(input_w / static_cast<double>(bins));
int padding_h = (kernel_size_h * bins - input_h + 1) / 2;
int padding_w = (kernel_size_w * bins - input_w + 1) / 2;
std::vector<int> kernel_size({kernel_size_h, kernel_size_w});
std::vector<int> strides({kernel_size_h, kernel_size_w});
std::vector<int> paddings({padding_h, padding_w});
// pooling output shape
framework::Tensor out_level;
std::vector<int64_t> output_shape_vec(
{in_x->dims()[0], in_x->dims()[1], bins, bins});
framework::DDim output_shape(framework::make_ddim(output_shape_vec));
out_level.mutable_data<T>(output_shape, context.GetPlace());
// pooling
if (pooling_type == "max") {
math::Pool2dFunctor<DeviceContext, math::MaxPool<T>, T> pool_forward;
math::MaxPool<T> max_process;
pool_forward(context.template device_context<DeviceContext>(), *in_x,
kernel_size, strides, paddings, max_process, &out_level);
} else if (pooling_type == "avg") {
math::Pool2dFunctor<DeviceContext, math::AvgPool<T>, T> pool_forward;
math::AvgPool<T> avg_process;
pool_forward(context.template device_context<DeviceContext>(), *in_x,
kernel_size, strides, paddings, avg_process, &out_level);
}
// flatten pooling output shape
int output_flatten_w = in_x->dims()[1] * bins * bins;
std::vector<int64_t> output_flatten_shape_vec(
{in_x->dims()[0], output_flatten_w});
framework::DDim output_flatten_shape(
framework::make_ddim(output_flatten_shape_vec));
out_level.Resize(output_flatten_shape);
// concat
auto out_level_stride = framework::stride(out_level.dims());
StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out_level.data<T>(), out_level_stride, out_level.dims(),
out_stride, out->data<T>() + output_offset);
output_offset += out_level.dims()[1] * out_level_stride[1];
}
}
};
template <typename DeviceContext, typename T>
class SppGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const framework::Tensor* in_x = context.Input<framework::Tensor>("X");
const framework::Tensor* out = context.Input<framework::Tensor>("Out");
const framework::Tensor* out_grad =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
framework::Tensor* in_x_grad =
context.Output<framework::Tensor>(framework::GradVarName("X"));
int pyramid_height = context.template Attr<int>("pyramid_height");
std::string pooling_type =
context.template Attr<std::string>("pooling_type");
auto& device_ctx = context.template device_context<DeviceContext>();
math::SetConstant<DeviceContext, T> zero;
in_x_grad->mutable_data<T>(context.GetPlace());
zero(device_ctx, in_x_grad, static_cast<T>(0));
auto out_stride = framework::stride(out->dims());
int input_h = in_x->dims()[2];
int input_w = in_x->dims()[3];
size_t out_offset = 0;
for (int p = 0; p < pyramid_height; ++p) {
int bins = std::pow(2, p);
int kernel_size_h = std::ceil(input_h / static_cast<double>(bins));
int kernel_size_w = std::ceil(input_w / static_cast<double>(bins));
int padding_h = (kernel_size_h * bins - input_h + 1) / 2;
int padding_w = (kernel_size_w * bins - input_w + 1) / 2;
std::vector<int> kernel_size({kernel_size_h, kernel_size_w});
std::vector<int> strides({kernel_size_h, kernel_size_w});
std::vector<int> paddings({padding_h, padding_w});
// split out and outgrad ... to flatten
framework::Tensor out_level;
framework::Tensor outgrad_level;
int out_flatten_w = in_x->dims()[1] * bins * bins;
std::vector<int64_t> out_flatten_shape_vec(
{in_x->dims()[0], out_flatten_w});
framework::DDim out_flatten_shape(
framework::make_ddim(out_flatten_shape_vec));
out_level.mutable_data<T>(out_flatten_shape, context.GetPlace());
outgrad_level.mutable_data<T>(out_flatten_shape, context.GetPlace());
auto flatten_stride = framework::stride(out_level.dims());
// memcpy
StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out->data<T>() + out_offset, out_stride,
out_level.dims(), flatten_stride, out_level.data<T>());
StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out_grad->data<T>() + out_offset, out_stride,
outgrad_level.dims(), flatten_stride,
outgrad_level.data<T>());
out_offset += out_level.dims()[1] * out_stride[1];
// flatten backward to nchw
std::vector<int64_t> out_shape_vec({in_x->dims()[0], in_x->dims()[1]});
out_shape_vec.push_back(
(input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1);
out_shape_vec.push_back(
(input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1);
framework::DDim out_shape(framework::make_ddim(out_shape_vec));
out_level.ShareDataWith(out_level);
out_level.Resize(out_shape);
outgrad_level.ShareDataWith(outgrad_level);
outgrad_level.Resize(out_shape);
// pooling backward
if (pooling_type == "max") {
math::MaxPool2dGradFunctor<DeviceContext, T> pool2d_backward;
pool2d_backward(context.template device_context<DeviceContext>(), *in_x,
*&out_level, *&outgrad_level, kernel_size, strides,
paddings, in_x_grad);
} else if (pooling_type == "avg") {
math::Pool2dGradFunctor<DeviceContext, math::AvgPoolGrad<T>, T>
pool_backward;
math::AvgPoolGrad<T> avg_process;
pool_backward(context.template device_context<DeviceContext>(), *in_x,
*&out_level, *&outgrad_level, kernel_size, strides,
paddings, avg_process, in_x_grad);
}
}
}
};
} // namespace operators
} // namespace paddle
...@@ -56,8 +56,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { ...@@ -56,8 +56,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker { class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SquaredL2DistanceOpMaker(framework::OpProto* proto, SquaredL2DistanceOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input of SquaredL2DistanceOp."); AddInput("X", "(Tensor) Input of SquaredL2DistanceOp.");
AddInput("Y", "(Tensor) Target of SquaredL2DistanceOp."); AddInput("Y", "(Tensor) Target of SquaredL2DistanceOp.");
......
...@@ -48,8 +48,7 @@ class SquaredL2NormGradOp : public framework::OperatorWithKernel { ...@@ -48,8 +48,7 @@ class SquaredL2NormGradOp : public framework::OperatorWithKernel {
class SquaredL2NormOpMaker : public framework::OpProtoAndCheckerMaker { class SquaredL2NormOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SquaredL2NormOpMaker(framework::OpProto* proto, SquaredL2NormOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input of squared_l2_norm op."); AddInput("X", "(Tensor) The input of squared_l2_norm op.");
AddOutput("Out", "(Scalar) The output of squared_l2_norm op."); AddOutput("Out", "(Scalar) The output of squared_l2_norm op.");
......
...@@ -29,7 +29,7 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -29,7 +29,7 @@ class SumOp : public framework::OperatorWithKernel {
"Output(Out) of SumOp should not be null."); "Output(Out) of SumOp should not be null.");
if (ctx->IsRuntime() && if (ctx->IsRuntime() &&
ctx->GetOutputsVarType("Out")[0] == ctx->GetOutputsVarType("Out")[0] ==
framework::VarDesc::LOD_TENSOR_ARRAY) { framework::proto::VarDesc::LOD_TENSOR_ARRAY) {
return; // skip runtime infershape when is tensor array; return; // skip runtime infershape when is tensor array;
} }
...@@ -72,8 +72,8 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -72,8 +72,8 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NE(dtype, -1, PADDLE_ENFORCE_NE(dtype, -1,
"Sum operator should have at least one tensor"); "Sum operator should have at least one tensor");
return framework::OpKernelType(static_cast<framework::DataType>(dtype), return framework::OpKernelType(
ctx.device_context()); static_cast<framework::proto::DataType>(dtype), ctx.device_context());
} else if (x_vars[0]->IsType<framework::SelectedRows>()) { } else if (x_vars[0]->IsType<framework::SelectedRows>()) {
return framework::OpKernelType( return framework::OpKernelType(
framework::ToDataType( framework::ToDataType(
...@@ -98,7 +98,7 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -98,7 +98,7 @@ class SumOp : public framework::OperatorWithKernel {
class SumOpMaker : public framework::OpProtoAndCheckerMaker { class SumOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SumOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) SumOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(vector<Tensor>) The input tensors of sum operator.") AddInput("X", "(vector<Tensor>) The input tensors of sum operator.")
.AsDuplicable(); .AsDuplicable();
...@@ -118,7 +118,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -118,7 +118,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
void operator()(const framework::OpDescBind& op_desc, void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override { framework::BlockDescBind* block) const override {
auto& inputs = op_desc.Input("X"); auto& inputs = op_desc.Input("X");
auto var_type = framework::VarDesc::SELECTED_ROWS; auto var_type = framework::proto::VarDesc::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) { for (auto& name : op_desc.Input("X")) {
VLOG(10) << name << " " VLOG(10) << name << " "
...@@ -128,12 +128,12 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -128,12 +128,12 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
bool any_input_is_lod_tensor = std::any_of( bool any_input_is_lod_tensor = std::any_of(
inputs.begin(), inputs.end(), [block](const std::string& name) { inputs.begin(), inputs.end(), [block](const std::string& name) {
return block->FindRecursiveOrCreateVar(name)->GetType() == return block->FindRecursiveOrCreateVar(name)->GetType() ==
framework::VarDesc::LOD_TENSOR; framework::proto::VarDesc::LOD_TENSOR;
}); });
auto is_tensor_array = [block](const std::string& name) { auto is_tensor_array = [block](const std::string& name) {
return detail::Ref(block->FindRecursiveOrCreateVar(name)).GetType() == return detail::Ref(block->FindRecursiveOrCreateVar(name)).GetType() ==
framework::VarDesc::LOD_TENSOR_ARRAY; framework::proto::VarDesc::LOD_TENSOR_ARRAY;
}; };
bool any_input_is_tensor_array = bool any_input_is_tensor_array =
...@@ -152,9 +152,9 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -152,9 +152,9 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
PADDLE_ENFORCE(all_inputs_are_tensor_array, PADDLE_ENFORCE(all_inputs_are_tensor_array,
"Not all inputs are tensor array:\n%s", os.str()); "Not all inputs are tensor array:\n%s", os.str());
} }
var_type = framework::VarDesc::LOD_TENSOR_ARRAY; var_type = framework::proto::VarDesc::LOD_TENSOR_ARRAY;
} else if (any_input_is_lod_tensor) { } else if (any_input_is_lod_tensor) {
var_type = framework::VarDesc::LOD_TENSOR; var_type = framework::proto::VarDesc::LOD_TENSOR;
} }
auto out_var_name = op_desc.Output("Out").front(); auto out_var_name = op_desc.Output("Out").front();
......
...@@ -51,8 +51,7 @@ class WriteToArrayOp : public ArrayOp { ...@@ -51,8 +51,7 @@ class WriteToArrayOp : public ArrayOp {
class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker { class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
WriteToArrayOpProtoMaker(framework::OpProto *proto, WriteToArrayOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor) the tensor will be written to tensor array"); AddInput("X", "(LoDTensor) the tensor will be written to tensor array");
AddInput( AddInput(
...@@ -104,7 +103,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference { ...@@ -104,7 +103,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name), auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name),
"Cannot found %s", out_name); "Cannot found %s", out_name);
out.SetType(framework::VarDesc::LOD_TENSOR_ARRAY); out.SetType(framework::proto::VarDesc::LOD_TENSOR_ARRAY);
auto *x = block->FindVarRecursive(x_name); auto *x = block->FindVarRecursive(x_name);
if (x != nullptr) { if (x != nullptr) {
out.SetDataType(x->GetDataType()); out.SetDataType(x->GetDataType());
...@@ -140,8 +139,7 @@ class ReadFromArrayOp : public ArrayOp { ...@@ -140,8 +139,7 @@ class ReadFromArrayOp : public ArrayOp {
class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker { class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ReadFromArrayProtoMaker(framework::OpProto *proto, ReadFromArrayProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(TensorArray) the array will be read from."); AddInput("X", "(TensorArray) the array will be read from.");
AddInput("I", AddInput("I",
......
...@@ -46,7 +46,7 @@ class TopkOp : public framework::OperatorWithKernel { ...@@ -46,7 +46,7 @@ class TopkOp : public framework::OperatorWithKernel {
class TopkOpMaker : public framework::OpProtoAndCheckerMaker { class TopkOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
TopkOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) TopkOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input of Topk op"); AddInput("X", "(Tensor) The input of Topk op");
AddOutput("Out", "(Tensor) The output tensor of Topk op"); AddOutput("Out", "(Tensor) The output tensor of Topk op");
......
...@@ -55,8 +55,7 @@ class TransposeOp : public framework::OperatorWithKernel { ...@@ -55,8 +55,7 @@ class TransposeOp : public framework::OperatorWithKernel {
class TransposeOpMaker : public framework::OpProtoAndCheckerMaker { class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
TransposeOpMaker(framework::OpProto* proto, TransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
......
...@@ -66,15 +66,14 @@ class UniformRandomOp : public framework::OperatorWithKernel { ...@@ -66,15 +66,14 @@ class UniformRandomOp : public framework::OperatorWithKernel {
framework::OpKernelType GetKernelType( framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType( return framework::OpKernelType(
static_cast<framework::DataType>(ctx.Attr<int>("dtype")), static_cast<framework::proto::DataType>(ctx.Attr<int>("dtype")),
ctx.GetPlace()); ctx.GetPlace());
} }
}; };
class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
UniformRandomOpMaker(framework::OpProto* proto, UniformRandomOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "(Tensor) The output tensor of uniform random op"); AddOutput("Out", "(Tensor) The output tensor of uniform random op");
AddComment(R"DOC( AddComment(R"DOC(
...@@ -100,7 +99,7 @@ uniform distribution. ...@@ -100,7 +99,7 @@ uniform distribution.
"0 means use a seed generated by the system.") "0 means use a seed generated by the system.")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("dtype", "(int, default 5(FP32)) Output tensor data type") AddAttr<int>("dtype", "(int, default 5(FP32)) Output tensor data type")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::proto::DataType::FP32);
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -18,8 +18,7 @@ namespace operators { ...@@ -18,8 +18,7 @@ namespace operators {
class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker { class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
Unpool2dOpMaker(framework::OpProto* proto, Unpool2dOpMaker(OpProto* proto, OpAttrChecker* op_checker)
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
......
...@@ -64,7 +64,7 @@ class WhileOp : public framework::OperatorBase { ...@@ -64,7 +64,7 @@ class WhileOp : public framework::OperatorBase {
class WhileOpMaker : public framework::OpProtoAndCheckerMaker { class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
WhileOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) WhileOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(kParameters, AddInput(kParameters,
"A set of variables, which are required by operators inside the " "A set of variables, which are required by operators inside the "
...@@ -321,10 +321,10 @@ class WhileGradOpShapeInference : public framework::InferShapeBase { ...@@ -321,10 +321,10 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
continue; continue;
} }
auto dims = ctx->GetInputsElementDim(kParameters, i); auto dims = ctx->GetInputsElementDim(kParameters, i);
if (var_types[i] == framework::VarDesc::LOD_TENSOR) { if (var_types[i] == framework::proto::VarDesc::LOD_TENSOR) {
names_to_set.push_back(pg_names[i]); names_to_set.push_back(pg_names[i]);
dims_to_set.push_back(dims); dims_to_set.push_back(dims);
} else if (var_types[i] == framework::VarDesc::LOD_TENSOR_ARRAY) { } else if (var_types[i] == framework::proto::VarDesc::LOD_TENSOR_ARRAY) {
// not sure how to set the dim of LOD_TENSOR_ARRAY // not sure how to set the dim of LOD_TENSOR_ARRAY
names_to_set.push_back(pg_names[i]); names_to_set.push_back(pg_names[i]);
dims_to_set.push_back(dims); dims_to_set.push_back(dims);
......
...@@ -19,7 +19,7 @@ CPUDeviceContext::CPUDeviceContext() { ...@@ -19,7 +19,7 @@ CPUDeviceContext::CPUDeviceContext() {
eigen_device_.reset(new Eigen::DefaultDevice()); eigen_device_.reset(new Eigen::DefaultDevice());
} }
CPUDeviceContext::CPUDeviceContext(CPUPlace place) { CPUDeviceContext::CPUDeviceContext(CPUPlace place) : place_(place) {
eigen_device_.reset(new Eigen::DefaultDevice()); eigen_device_.reset(new Eigen::DefaultDevice());
} }
...@@ -27,7 +27,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { ...@@ -27,7 +27,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
return eigen_device_.get(); return eigen_device_.get();
} }
Place CPUDeviceContext::GetPlace() const { return CPUPlace(); } Place CPUDeviceContext::GetPlace() const { return place_; }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -125,21 +125,21 @@ cudnnHandle_t CUDADeviceContext::cudnn_handle() const { return cudnn_handle_; } ...@@ -125,21 +125,21 @@ cudnnHandle_t CUDADeviceContext::cudnn_handle() const { return cudnn_handle_; }
cudaStream_t CUDADeviceContext::stream() const { return stream_; } cudaStream_t CUDADeviceContext::stream() const { return stream_; }
CudnnDeviceContext::CudnnDeviceContext(CudnnPlace place) CUDNNDeviceContext::CUDNNDeviceContext(CUDNNPlace place)
: CUDADeviceContext(place), place_(place) { : CUDADeviceContext(place), place_(place) {
PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_)); PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_));
PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream())); PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream()));
} }
CudnnDeviceContext::~CudnnDeviceContext() { CUDNNDeviceContext::~CUDNNDeviceContext() {
SetDeviceId(place_.device); SetDeviceId(place_.device);
Wait(); Wait();
PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_)); PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_));
} }
Place CudnnDeviceContext::GetPlace() const { return CudnnPlace(); } Place CUDNNDeviceContext::GetPlace() const { return CUDNNPlace(); }
cudnnHandle_t CudnnDeviceContext::cudnn_handle() const { return cudnn_handle_; } cudnnHandle_t CUDNNDeviceContext::cudnn_handle() const { return cudnn_handle_; }
#endif #endif
......
...@@ -45,6 +45,7 @@ class CPUDeviceContext : public DeviceContext { ...@@ -45,6 +45,7 @@ class CPUDeviceContext : public DeviceContext {
Place GetPlace() const override; Place GetPlace() const override;
private: private:
CPUPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_; std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
}; };
...@@ -86,10 +87,10 @@ class CUDADeviceContext : public DeviceContext { ...@@ -86,10 +87,10 @@ class CUDADeviceContext : public DeviceContext {
cublasHandle_t cublas_handle_; cublasHandle_t cublas_handle_;
}; };
class CudnnDeviceContext : public CUDADeviceContext { class CUDNNDeviceContext : public CUDADeviceContext {
public: public:
explicit CudnnDeviceContext(CudnnPlace place); explicit CUDNNDeviceContext(CUDNNPlace place);
virtual ~CudnnDeviceContext(); virtual ~CUDNNDeviceContext();
/*! \brief Return place in the device context. */ /*! \brief Return place in the device context. */
Place GetPlace() const final; Place GetPlace() const final;
...@@ -99,7 +100,7 @@ class CudnnDeviceContext : public CUDADeviceContext { ...@@ -99,7 +100,7 @@ class CudnnDeviceContext : public CUDADeviceContext {
private: private:
cudnnHandle_t cudnn_handle_; cudnnHandle_t cudnn_handle_;
CudnnPlace place_; CUDNNPlace place_;
}; };
#endif #endif
......
...@@ -47,14 +47,14 @@ TEST(Device, CUDADeviceContext) { ...@@ -47,14 +47,14 @@ TEST(Device, CUDADeviceContext) {
} }
} }
TEST(Device, CudnnDeviceContext) { TEST(Device, CUDNNDeviceContext) {
using paddle::platform::CudnnDeviceContext; using paddle::platform::CUDNNDeviceContext;
using paddle::platform::CudnnPlace; using paddle::platform::CUDNNPlace;
if (paddle::platform::dynload::HasCUDNN()) { if (paddle::platform::dynload::HasCUDNN()) {
int count = paddle::platform::GetCUDADeviceCount(); int count = paddle::platform::GetCUDADeviceCount();
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
CudnnDeviceContext* device_context = CUDNNDeviceContext* device_context =
new CudnnDeviceContext(CudnnPlace(i)); new CUDNNDeviceContext(CUDNNPlace(i));
cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); cudnnHandle_t cudnn_handle = device_context->cudnn_handle();
ASSERT_NE(nullptr, cudnn_handle); ASSERT_NE(nullptr, cudnn_handle);
ASSERT_NE(nullptr, device_context->stream()); ASSERT_NE(nullptr, device_context->stream());
......
...@@ -25,6 +25,11 @@ void *nccl_dso_handle; ...@@ -25,6 +25,11 @@ void *nccl_dso_handle;
NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
void LoadNCCLDSO() {
platform::call_once(nccl_dso_flag,
[] { GetNCCLDsoHandle(&nccl_dso_handle); });
}
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -28,18 +28,18 @@ extern std::once_flag nccl_dso_flag; ...@@ -28,18 +28,18 @@ extern std::once_flag nccl_dso_flag;
extern void* nccl_dso_handle; extern void* nccl_dso_handle;
#ifdef PADDLE_USE_DSO #ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ extern void LoadNCCLDSO();
struct DynLoad__##__name { \
template <typename... Args> \ #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
auto operator()(Args... args) -> decltype(__name(args...)) { \ struct DynLoad__##__name { \
using nccl_func = decltype(__name(args...)) (*)(Args...); \ template <typename... Args> \
platform::call_once(nccl_dso_flag, \ auto operator()(Args... args) -> decltype(__name(args...)) { \
paddle::platform::dynload::GetNCCLDsoHandle, \ using nccl_func = decltype(__name(args...)) (*)(Args...); \
&nccl_dso_handle); \ paddle::platform::dynload::LoadNCCLDSO(); \
void* p_##__name = dlsym(nccl_dso_handle, #__name); \ void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \ return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \ } \
}; \ }; \
extern DynLoad__##__name __name extern DynLoad__##__name __name
#else #else
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
......
...@@ -73,19 +73,20 @@ size_t GpuMaxChunkSize() { ...@@ -73,19 +73,20 @@ size_t GpuMaxChunkSize() {
size_t available = 0; size_t available = 0;
GpuMemoryUsage(available, total); GpuMemoryUsage(available, total);
VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/"
// Reserving the rest memory for page tables, etc. << total / 1024 / 1024 << "M";
size_t reserving = 0.05 * total; size_t reserving = static_cast<size_t>(0.05 * total);
// If available less than minimum chunk size, no usable memory exists. // If available less than minimum chunk size, no usable memory exists.
available = available =
std::max(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(), std::min(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(),
reserving) - total - reserving);
reserving;
// Reserving the rest memory for page tables, etc.
size_t allocating = FLAGS_fraction_of_gpu_memory_to_use * total; size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use *
(total - reserving));
PADDLE_ENFORCE_LT(allocating, available); PADDLE_ENFORCE_LE(allocating, available);
return allocating; return allocating;
} }
......
...@@ -31,7 +31,7 @@ namespace platform { ...@@ -31,7 +31,7 @@ namespace platform {
TEST(NCCL, init) { TEST(NCCL, init) {
std::vector<ncclComm_t> comms; std::vector<ncclComm_t> comms;
comms.resize(dev_count); comms.resize(dev_count);
PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr)); dynload::ncclCommInitAll(comms.data(), dev_count, nullptr);
for (int i = 0; i < dev_count; ++i) { for (int i = 0; i < dev_count; ++i) {
dynload::ncclCommDestroy(comms[i]); dynload::ncclCommDestroy(comms[i]);
} }
...@@ -62,7 +62,7 @@ TEST(NCCL, all_reduce) { ...@@ -62,7 +62,7 @@ TEST(NCCL, all_reduce) {
std::vector<ncclComm_t> comms; std::vector<ncclComm_t> comms;
comms.resize(dev_count); comms.resize(dev_count);
VLOG(1) << "Initializing ncclComm"; VLOG(1) << "Initializing ncclComm";
PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr)); dynload::ncclCommInitAll(comms.data(), dev_count, nullptr);
VLOG(1) << "ncclComm initialized"; VLOG(1) << "ncclComm initialized";
VLOG(1) << "Creating thread data"; VLOG(1) << "Creating thread data";
std::vector<std::unique_ptr<PerThreadData<double>>> data; std::vector<std::unique_ptr<PerThreadData<double>>> data;
......
...@@ -23,6 +23,7 @@ class PlacePrinter : public boost::static_visitor<> { ...@@ -23,6 +23,7 @@ class PlacePrinter : public boost::static_visitor<> {
public: public:
explicit PlacePrinter(std::ostream &os) : os_(os) {} explicit PlacePrinter(std::ostream &os) : os_(os) {}
void operator()(const CPUPlace &) { os_ << "CPUPlace"; } void operator()(const CPUPlace &) { os_ << "CPUPlace"; }
void operator()(const MKLDNNPlace &) { os_ << "MKLDNNPlace"; }
void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; }
private: private:
...@@ -38,12 +39,17 @@ const Place &get_place() { return the_default_place; } ...@@ -38,12 +39,17 @@ const Place &get_place() { return the_default_place; }
const GPUPlace default_gpu() { return GPUPlace(0); } const GPUPlace default_gpu() { return GPUPlace(0); }
const CPUPlace default_cpu() { return CPUPlace(); } const CPUPlace default_cpu() { return CPUPlace(); }
const MKLDNNPlace default_mkldnn() { return MKLDNNPlace(); }
bool is_gpu_place(const Place &p) { bool is_gpu_place(const Place &p) {
return boost::apply_visitor(IsGPUPlace(), p); return boost::apply_visitor(IsGPUPlace(), p);
} }
bool is_cpu_place(const Place &p) { bool is_cpu_place(const Place &p) {
return !boost::apply_visitor(IsGPUPlace(), p); return !is_gpu_place(p) && !is_mkldnn_place(p);
}
bool is_mkldnn_place(const Place &p) {
return boost::apply_visitor(IsMKLDNNPlace(), p);
} }
bool places_are_same_class(const Place &p1, const Place &p2) { bool places_are_same_class(const Place &p1, const Place &p2) {
......
...@@ -31,6 +31,14 @@ struct CPUPlace { ...@@ -31,6 +31,14 @@ struct CPUPlace {
inline bool operator!=(const CPUPlace &) const { return false; } inline bool operator!=(const CPUPlace &) const { return false; }
}; };
struct MKLDNNPlace {
MKLDNNPlace() {}
// needed for variant equality comparison
inline bool operator==(const MKLDNNPlace &) const { return true; }
inline bool operator!=(const MKLDNNPlace &) const { return false; }
};
struct GPUPlace { struct GPUPlace {
GPUPlace() : GPUPlace(0) {} GPUPlace() : GPUPlace(0) {}
explicit GPUPlace(int d) : device(d) {} explicit GPUPlace(int d) : device(d) {}
...@@ -43,21 +51,28 @@ struct GPUPlace { ...@@ -43,21 +51,28 @@ struct GPUPlace {
int device; int device;
}; };
struct CudnnPlace : public GPUPlace { struct CUDNNPlace : public GPUPlace {
CudnnPlace() : GPUPlace() {} CUDNNPlace() : GPUPlace() {}
explicit CudnnPlace(int d) : GPUPlace(d) {} explicit CUDNNPlace(int d) : GPUPlace(d) {}
}; };
struct IsGPUPlace : public boost::static_visitor<bool> { struct IsGPUPlace : public boost::static_visitor<bool> {
bool operator()(const CPUPlace &) const { return false; } bool operator()(const CPUPlace &) const { return false; }
bool operator()(const MKLDNNPlace &) const { return false; }
bool operator()(const GPUPlace &gpu) const { return true; } bool operator()(const GPUPlace &gpu) const { return true; }
}; };
struct IsMKLDNNPlace : public boost::static_visitor<bool> {
bool operator()(const MKLDNNPlace &) const { return true; }
bool operator()(const CPUPlace &) const { return false; }
bool operator()(const GPUPlace &) const { return false; }
};
// Define the max number of Place in bit length. i.e., the max number of places // Define the max number of Place in bit length. i.e., the max number of places
// should be less equal than 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT) // should be less equal than 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT)
#define NUM_PLACE_TYPE_LIMIT_IN_BIT 4 #define NUM_PLACE_TYPE_LIMIT_IN_BIT 4
typedef boost::variant<CudnnPlace, GPUPlace, CPUPlace> Place; typedef boost::variant<CUDNNPlace, GPUPlace, CPUPlace, MKLDNNPlace> Place;
// static check number of place types is less equal than // static check number of place types is less equal than
// 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT) // 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT)
...@@ -70,9 +85,11 @@ const Place &get_place(); ...@@ -70,9 +85,11 @@ const Place &get_place();
const GPUPlace default_gpu(); const GPUPlace default_gpu();
const CPUPlace default_cpu(); const CPUPlace default_cpu();
const MKLDNNPlace default_mkldnn();
bool is_gpu_place(const Place &); bool is_gpu_place(const Place &);
bool is_cpu_place(const Place &); bool is_cpu_place(const Place &);
bool is_mkldnn_place(const Place &);
bool places_are_same_class(const Place &, const Place &); bool places_are_same_class(const Place &, const Place &);
std::ostream &operator<<(std::ostream &, const Place &); std::ostream &operator<<(std::ostream &, const Place &);
......
...@@ -5,25 +5,37 @@ ...@@ -5,25 +5,37 @@
TEST(Place, Equality) { TEST(Place, Equality) {
paddle::platform::CPUPlace cpu; paddle::platform::CPUPlace cpu;
paddle::platform::GPUPlace g0(0), g1(1), gg0(0); paddle::platform::GPUPlace g0(0), g1(1), gg0(0);
paddle::platform::CUDNNPlace d0(0), d1(1), dd0(0);
EXPECT_EQ(cpu, cpu); EXPECT_EQ(cpu, cpu);
EXPECT_EQ(g0, g0); EXPECT_EQ(g0, g0);
EXPECT_EQ(g1, g1); EXPECT_EQ(g1, g1);
EXPECT_EQ(g0, gg0); EXPECT_EQ(g0, gg0);
EXPECT_EQ(d0, dd0);
EXPECT_NE(g0, g1); EXPECT_NE(g0, g1);
EXPECT_NE(d0, d1);
EXPECT_TRUE(paddle::platform::places_are_same_class(g0, gg0)); EXPECT_TRUE(paddle::platform::places_are_same_class(g0, gg0));
EXPECT_FALSE(paddle::platform::places_are_same_class(g0, cpu)); EXPECT_FALSE(paddle::platform::places_are_same_class(g0, cpu));
EXPECT_TRUE(paddle::platform::is_gpu_place(d0));
EXPECT_FALSE(paddle::platform::places_are_same_class(g0, d0));
} }
TEST(Place, Default) { TEST(Place, Default) {
EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::get_place())); EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::get_place()));
EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu())); EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu()));
EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu()));
EXPECT_TRUE(
paddle::platform::is_mkldnn_place(paddle::platform::default_mkldnn()));
paddle::platform::set_place(paddle::platform::CPUPlace()); paddle::platform::set_place(paddle::platform::CPUPlace());
EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place()));
paddle::platform::set_place(paddle::platform::MKLDNNPlace());
EXPECT_FALSE(paddle::platform::is_cpu_place(paddle::platform::get_place()));
EXPECT_TRUE(paddle::platform::is_mkldnn_place(paddle::platform::get_place()));
} }
TEST(Place, Print) { TEST(Place, Print) {
......
...@@ -14,6 +14,19 @@ ...@@ -14,6 +14,19 @@
#pragma once #pragma once
#ifdef __CUDACC__
#ifdef __CUDACC_VER_MAJOR__
// CUDA 9 define `__CUDACC_VER__` as a warning message, manually define
// __CUDACC_VER__ instead.
#undef __CUDACC_VER__
#define __CUDACC_VER__ \
(__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 + \
__CUDACC_VER_BUILD__)
#endif
#endif
#include <boost/config.hpp> #include <boost/config.hpp>
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
......
if(WITH_PYTHON) if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc SRCS pybind.cc exception.cc protobuf.cc const_value.cc
DEPS pybind python backward proto_desc paddle_memory executor prune DEPS pybind python backward proto_desc paddle_memory executor prune init
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
endif(WITH_PYTHON) endif(WITH_PYTHON)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "const_value.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace pybind {
void BindConstValue(pybind11::module& m) {
m.def("kEmptyVarName", [] { return framework::kEmptyVarName; });
m.def("kTempVarName", [] { return framework::kTempVarName; });
m.def("kGradVarSuffix", [] { return framework::kGradVarSuffix; });
m.def("kZeroVarSuffix", [] { return framework::kZeroVarSuffix; });
}
} // namespace pybind
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <Python.h>
#include "paddle/platform/enforce.h"
#include "pybind11/pybind11.h"
namespace py = pybind11;
namespace paddle {
namespace pybind {
extern void BindConstValue(pybind11::module& m);
} // namespace pybind
} // namespace paddle
...@@ -31,31 +31,32 @@ std::string Escape(const std::string& s) { ...@@ -31,31 +31,32 @@ std::string Escape(const std::string& s) {
return r; return r;
} }
std::string AttrType(paddle::framework::AttrType at) { std::string AttrType(paddle::framework::proto::AttrType at) {
switch (at) { switch (at) {
case paddle::framework::INT: case paddle::framework::proto::INT:
return "int"; return "int";
case paddle::framework::FLOAT: case paddle::framework::proto::FLOAT:
return "float"; return "float";
case paddle::framework::STRING: case paddle::framework::proto::STRING:
return "string"; return "string";
case paddle::framework::BOOLEAN: case paddle::framework::proto::BOOLEAN:
return "bool"; return "bool";
case paddle::framework::INTS: case paddle::framework::proto::INTS:
return "int array"; return "int array";
case paddle::framework::FLOATS: case paddle::framework::proto::FLOATS:
return "float array"; return "float array";
case paddle::framework::STRINGS: case paddle::framework::proto::STRINGS:
return "string array"; return "string array";
case paddle::framework::BOOLEANS: case paddle::framework::proto::BOOLEANS:
return "bool array"; return "bool array";
case paddle::framework::BLOCK: case paddle::framework::proto::BLOCK:
return "block id"; return "block id";
} }
return "UNKNOWN"; // not possible return "UNKNOWN"; // not possible
} }
void PrintVar(const paddle::framework::OpProto::Var& v, std::stringstream& ss) { void PrintVar(const paddle::framework::proto::OpProto::Var& v,
std::stringstream& ss) {
ss << " { " ss << " { "
<< "\n" << "\n"
<< " \"name\" : \"" << Escape(v.name()) << "\",\n" << " \"name\" : \"" << Escape(v.name()) << "\",\n"
...@@ -65,7 +66,7 @@ void PrintVar(const paddle::framework::OpProto::Var& v, std::stringstream& ss) { ...@@ -65,7 +66,7 @@ void PrintVar(const paddle::framework::OpProto::Var& v, std::stringstream& ss) {
<< " },"; << " },";
} }
void PrintAttr(const paddle::framework::OpProto::Attr& a, void PrintAttr(const paddle::framework::proto::OpProto::Attr& a,
std::stringstream& ss) { std::stringstream& ss) {
ss << " { " ss << " { "
<< "\n" << "\n"
...@@ -81,7 +82,7 @@ void PrintOpProto(const std::string& type, ...@@ -81,7 +82,7 @@ void PrintOpProto(const std::string& type,
std::stringstream& ss) { std::stringstream& ss) {
std::cerr << "Processing " << type << "\n"; std::cerr << "Processing " << type << "\n";
const paddle::framework::OpProto* p = opinfo.proto_; const paddle::framework::proto::OpProto* p = opinfo.proto_;
if (p == nullptr) { if (p == nullptr) {
return; // It is possible that an operator doesn't have OpProto. return; // It is possible that an operator doesn't have OpProto.
} }
......
...@@ -144,7 +144,7 @@ void BindProgramDesc(py::module &m) { ...@@ -144,7 +144,7 @@ void BindProgramDesc(py::module &m) {
.def("serialize_to_string", SerializeMessage<ProgramDescBind>) .def("serialize_to_string", SerializeMessage<ProgramDescBind>)
.def("parse_from_string", .def("parse_from_string",
[](ProgramDescBind &program_desc, const std::string &data) { [](ProgramDescBind &program_desc, const std::string &data) {
ProgramDesc *desc = program_desc.Proto(); proto::ProgramDesc *desc = program_desc.Proto();
PADDLE_ENFORCE(desc->ParseFromString(data), PADDLE_ENFORCE(desc->ParseFromString(data),
"Fail to parse ProgramDesc from string. This could " "Fail to parse ProgramDesc from string. This could "
"be a bug of Paddle."); "be a bug of Paddle.");
...@@ -184,14 +184,14 @@ void BindBlockDesc(py::module &m) { ...@@ -184,14 +184,14 @@ void BindBlockDesc(py::module &m) {
} }
void BindVarDsec(py::module &m) { void BindVarDsec(py::module &m) {
py::enum_<DataType>(m, "DataType", "") py::enum_<proto::DataType>(m, "DataType", "")
.value("BOOL", DataType::BOOL) .value("BOOL", proto::DataType::BOOL)
.value("INT16", DataType::INT16) .value("INT16", proto::DataType::INT16)
.value("INT32", DataType::INT32) .value("INT32", proto::DataType::INT32)
.value("INT64", DataType::INT64) .value("INT64", proto::DataType::INT64)
.value("FP16", DataType::FP16) .value("FP16", proto::DataType::FP16)
.value("FP32", DataType::FP32) .value("FP32", proto::DataType::FP32)
.value("FP64", DataType::FP64); .value("FP64", proto::DataType::FP64);
py::class_<VarDescBind> var_desc(m, "VarDesc", ""); py::class_<VarDescBind> var_desc(m, "VarDesc", "");
var_desc var_desc
...@@ -213,27 +213,27 @@ void BindVarDsec(py::module &m) { ...@@ -213,27 +213,27 @@ void BindVarDsec(py::module &m) {
.def("persistable", &VarDescBind::Persistable) .def("persistable", &VarDescBind::Persistable)
.def("set_persistable", &VarDescBind::SetPersistable); .def("set_persistable", &VarDescBind::SetPersistable);
py::enum_<VarDesc::VarType>(var_desc, "VarType", "") py::enum_<proto::VarDesc::VarType>(var_desc, "VarType", "")
.value("LOD_TENSOR", VarDesc::LOD_TENSOR) .value("LOD_TENSOR", proto::VarDesc::LOD_TENSOR)
.value("SELECTED_ROWS", VarDesc::SELECTED_ROWS) .value("SELECTED_ROWS", proto::VarDesc::SELECTED_ROWS)
.value("FEED_MINIBATCH", VarDesc::FEED_MINIBATCH) .value("FEED_MINIBATCH", proto::VarDesc::FEED_MINIBATCH)
.value("FETCH_LIST", VarDesc::FETCH_LIST) .value("FETCH_LIST", proto::VarDesc::FETCH_LIST)
.value("STEP_SCOPES", VarDesc::STEP_SCOPES) .value("STEP_SCOPES", proto::VarDesc::STEP_SCOPES)
.value("LOD_RANK_TABLE", VarDesc::LOD_RANK_TABLE) .value("LOD_RANK_TABLE", proto::VarDesc::LOD_RANK_TABLE)
.value("LOD_TENSOR_ARRAY", VarDesc::LOD_TENSOR_ARRAY); .value("LOD_TENSOR_ARRAY", proto::VarDesc::LOD_TENSOR_ARRAY);
} }
void BindOpDesc(py::module &m) { void BindOpDesc(py::module &m) {
py::enum_<AttrType>(m, "AttrType", "") py::enum_<proto::AttrType>(m, "AttrType", "")
.value("INT", AttrType::INT) .value("INT", proto::AttrType::INT)
.value("INTS", AttrType::INTS) .value("INTS", proto::AttrType::INTS)
.value("FLOAT", AttrType::FLOAT) .value("FLOAT", proto::AttrType::FLOAT)
.value("FLOATS", AttrType::FLOATS) .value("FLOATS", proto::AttrType::FLOATS)
.value("STRING", AttrType::STRING) .value("STRING", proto::AttrType::STRING)
.value("STRINGS", AttrType::STRINGS) .value("STRINGS", proto::AttrType::STRINGS)
.value("BOOL", AttrType::BOOLEAN) .value("BOOL", proto::AttrType::BOOLEAN)
.value("BOOLS", AttrType::BOOLEANS) .value("BOOLS", proto::AttrType::BOOLEANS)
.value("BLOCK", AttrType::BLOCK); .value("BLOCK", proto::AttrType::BLOCK);
py::class_<OpDescBind> op_desc(m, "OpDesc", ""); py::class_<OpDescBind> op_desc(m, "OpDesc", "");
op_desc.def("type", &OpDescBind::Type) op_desc.def("type", &OpDescBind::Type)
......
...@@ -16,11 +16,11 @@ limitations under the License. */ ...@@ -16,11 +16,11 @@ limitations under the License. */
#include <mutex> // for call_once #include <mutex> // for call_once
#include <unordered_map> #include <unordered_map>
#include "gflags/gflags.h"
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include "paddle/framework/executor.h" #include "paddle/framework/executor.h"
#include "paddle/framework/feed_fetch_method.h" #include "paddle/framework/feed_fetch_method.h"
#include "paddle/framework/framework.pb.h" #include "paddle/framework/framework.pb.h"
#include "paddle/framework/init.h"
#include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/lod_tensor_array.h"
...@@ -30,6 +30,7 @@ limitations under the License. */ ...@@ -30,6 +30,7 @@ limitations under the License. */
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "paddle/pybind/const_value.h"
#include "paddle/pybind/exception.h" #include "paddle/pybind/exception.h"
#include "paddle/pybind/pybind.h" #include "paddle/pybind/pybind.h"
#include "paddle/pybind/tensor_py.h" #include "paddle/pybind/tensor_py.h"
...@@ -51,24 +52,6 @@ static size_t UniqueIntegerGenerator(const std::string &prefix) { ...@@ -51,24 +52,6 @@ static size_t UniqueIntegerGenerator(const std::string &prefix) {
return generators[prefix].fetch_add(1); return generators[prefix].fetch_add(1);
} }
std::once_flag gflags_init_flag;
// TODO(qijun) move init gflags to init.cc
void InitGflags(std::vector<std::string> &argv) {
std::call_once(gflags_init_flag, [&]() {
int argc = argv.size();
char **arr = new char *[argv.size()];
std::string line;
for (size_t i = 0; i < argv.size(); i++) {
arr[i] = &argv[i][0];
line += argv[i];
line += ' ';
}
google::ParseCommandLineFlags(&argc, &arr, true);
VLOG(1) << "Init commandline: " << line;
});
}
bool IsCompileGPU() { bool IsCompileGPU() {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
return false; return false;
...@@ -305,12 +288,12 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -305,12 +288,12 @@ All parameter, weight, gradient are variables in Paddle.
for (const auto &t : targets) { for (const auto &t : targets) {
prog_with_targets.MutableBlock(t[0])->Op(t[1])->MarkAsTarget(); prog_with_targets.MutableBlock(t[0])->Op(t[1])->MarkAsTarget();
} }
ProgramDesc pruned_desc; proto::ProgramDesc pruned_desc;
Prune(*prog_with_targets.Proto(), &pruned_desc); Prune(*prog_with_targets.Proto(), &pruned_desc);
return new ProgramDescBind(pruned_desc); return new ProgramDescBind(pruned_desc);
}); });
m.def("inference_optimize", [](ProgramDescBind &origin) { m.def("inference_optimize", [](ProgramDescBind &origin) {
ProgramDesc pruned_desc; proto::ProgramDesc pruned_desc;
InferenceOptimize(*(origin.Proto()), &pruned_desc); InferenceOptimize(*(origin.Proto()), &pruned_desc);
return new ProgramDescBind(pruned_desc); return new ProgramDescBind(pruned_desc);
}); });
...@@ -362,7 +345,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -362,7 +345,7 @@ All parameter, weight, gradient are variables in Paddle.
py::class_<OperatorBase>(m, "Operator") py::class_<OperatorBase>(m, "Operator")
.def_static("create", .def_static("create",
[](py::bytes protobin) { [](py::bytes protobin) {
OpDesc desc; proto::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc"); "Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(), PADDLE_ENFORCE(desc.IsInitialized(),
...@@ -415,7 +398,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -415,7 +398,7 @@ All parameter, weight, gradient are variables in Paddle.
py::class_<operators::CondOp, OperatorBase>(m, "CondOp") py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
.def_static("create", .def_static("create",
[](py::bytes protobin) -> operators::CondOp * { [](py::bytes protobin) -> operators::CondOp * {
OpDesc desc; proto::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc"); "Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(), PADDLE_ENFORCE(desc.IsInitialized(),
...@@ -438,7 +421,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -438,7 +421,8 @@ All parameter, weight, gradient are variables in Paddle.
.def("run", &Executor::Run); .def("run", &Executor::Run);
m.def("unique_integer", UniqueIntegerGenerator); m.def("unique_integer", UniqueIntegerGenerator);
m.def("init_gflags", InitGflags); m.def("init_gflags", framework::InitGflags);
m.def("init_devices", &framework::InitDevices);
m.def("is_compile_gpu", IsCompileGPU); m.def("is_compile_gpu", IsCompileGPU);
m.def("set_feed_variable", framework::SetFeedVariable); m.def("set_feed_variable", framework::SetFeedVariable);
...@@ -448,6 +432,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -448,6 +432,7 @@ All parameter, weight, gradient are variables in Paddle.
BindBlockDesc(m); BindBlockDesc(m);
BindVarDsec(m); BindVarDsec(m);
BindOpDesc(m); BindOpDesc(m);
BindConstValue(m);
py::class_<framework::LoDRankTable>(m, "LodRankTable") py::class_<framework::LoDRankTable>(m, "LodRankTable")
.def("items", [](framework::LoDRankTable &table) { .def("items", [](framework::LoDRankTable &table) {
......
...@@ -14,9 +14,8 @@ make -j `nproc` print_operators_doc ...@@ -14,9 +14,8 @@ make -j `nproc` print_operators_doc
paddle/pybind/print_operators_doc > doc/en/html/operators.json paddle/pybind/print_operators_doc > doc/en/html/operators.json
# check websites for broken links # check websites for broken links
# It will be failed now! linkchecker doc/en/html/index.html
#linkchecker doc/en/html/index.html linkchecker doc/cn/html/index.html
#linkchecker doc/cn/html/index.html
# Parse Github URL # Parse Github URL
REPO=`git config remote.origin.url` REPO=`git config remote.origin.url`
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
build build
dist dist
paddle.egg-info paddle.egg-info
paddlepaddle_gpu.egg-info
.idea .idea
paddle/proto/*.py paddle/proto/*.py
paddle/proto/*.pyc paddle/proto/*.pyc
...@@ -25,10 +25,10 @@ from paddle.trainer.config_parser import * ...@@ -25,10 +25,10 @@ from paddle.trainer.config_parser import *
__all__ = [ __all__ = [
'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
"img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg', "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg',
'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru', 'img_conv_group', 'img_separable_conv', 'vgg_16_network', 'gru_unit',
'simple_attention', 'dot_product_attention', 'multi_head_attention', 'gru_group', 'simple_gru', 'simple_attention', 'dot_product_attention',
'simple_gru2', 'bidirectional_gru', 'text_conv_pool', 'bidirectional_lstm', 'multi_head_attention', 'simple_gru2', 'bidirectional_gru',
'inputs', 'outputs' 'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs'
] ]
###################################################### ######################################################
...@@ -251,13 +251,13 @@ def img_conv_bn_pool(input, ...@@ -251,13 +251,13 @@ def img_conv_bn_pool(input,
pool_layer_attr=None): pool_layer_attr=None):
""" """
Convolution, batch normalization, pooling group. Convolution, batch normalization, pooling group.
Img input => Conv => BN => Pooling => Output. Img input => Conv => BN => Pooling => Output.
:param name: group name. :param name: group name.
:type name: basestring :type name: basestring
:param input: input layer. :param input: input layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: see img_conv_layer for details. :param filter_size: see img_conv_layer for details.
:type filter_size: int :type filter_size: int
:param num_filters: see img_conv_layer for details. :param num_filters: see img_conv_layer for details.
...@@ -435,6 +435,85 @@ def img_conv_group(input, ...@@ -435,6 +435,85 @@ def img_conv_group(input,
input=tmp, stride=pool_stride, pool_size=pool_size, pool_type=pool_type) input=tmp, stride=pool_stride, pool_size=pool_size, pool_type=pool_type)
@wrap_name_default("separable_conv")
def img_separable_conv(input,
num_channels,
num_out_channels,
filter_size,
stride=1,
padding=0,
depth_multiplier=1,
act=None,
bias_attr=None,
param_attr=None,
shared_bias=True,
layer_type='exconv',
name=None):
"""
Separable Convolution.
The separable convolution module is consisted of a depthwise convolution
that acts separately on input channels, followed by a pointwise convolution
with 1*1 kernels that mixes channels. It is used for Xception:
https://arxiv.org/pdf/1610.02357.pdf
:param input: input layer.
:type input: LayerOutput
:param num_channels: the number of input channels.
:type num_channels: int
:param num_out_channels: the number of output channels.
:type num_out_channels: int
:param filter_size: the filter size for the depthwise convolution.
:type filter_size: int|tuple
:param stride: the stride size for the depthwise convolution.
:type stride: int|tuple
:param padding: the padding size for the depthwise convolution.
:type padding: int|tuple
:param depth_multiplier: the number of filter for one channel in the
depthwize convolution.
:type depth_multiplier: int
:param act: the activation function for the output.
:type act: BaseActivation
:param bias_attr: see img_conv_layer for details.
:type bias_attr: ParameterAttribute
:param param_attr: see img_conv_layer for details.
:type param_attr: ParameterAttribute
:param shared_bias: see img_conv_layer for details.
:type shared_bias: bool
:param layer_type: see img_conv_layer for details.
:type layer_type: bool
:return: layer's output
:rtype: LayerOutput
"""
__depthwise_conv__ = img_conv_layer(
name="%s_depthwise_conv" % name,
input=input,
num_channels=num_channels,
num_filters=num_channels * depth_multiplier,
groups=num_channels,
filter_size=filter_size,
stride=stride,
padding=padding,
act=LinearActivation(),
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=shared_bias,
layer_type=layer_type)
__pointwise_conv__ = img_conv_layer(
name="%s_pointwise_conv" % name,
input=__depthwise_conv__,
num_channels=num_channels * depth_multiplier,
num_filters=num_out_channels,
filter_size=1,
stride=1,
padding=0,
act=act,
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=shared_bias)
return __pointwise_conv__
def small_vgg(input_image, num_channels, num_classes): def small_vgg(input_image, num_channels, num_classes):
def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None): def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None):
return img_conv_group( return img_conv_group(
...@@ -648,7 +727,7 @@ def lstmemory_unit(input, ...@@ -648,7 +727,7 @@ def lstmemory_unit(input,
lstm_bias_attr=None, lstm_bias_attr=None,
lstm_layer_attr=None): lstm_layer_attr=None):
""" """
lstmemory_unit defines the caculation process of a LSTM unit during a lstmemory_unit defines the caculation process of a LSTM unit during a
single time step. This function is not a recurrent layer, so it can not be single time step. This function is not a recurrent layer, so it can not be
directly used to process sequence input. This function is always used in directly used to process sequence input. This function is always used in
recurrent_group (see layers.py for more details) to implement attention recurrent_group (see layers.py for more details) to implement attention
...@@ -869,7 +948,7 @@ def gru_unit(input, ...@@ -869,7 +948,7 @@ def gru_unit(input,
gru_layer_attr=None, gru_layer_attr=None,
naive=False): naive=False):
""" """
gru_unit defines the calculation process of a gated recurrent unit during a single gru_unit defines the calculation process of a gated recurrent unit during a single
time step. This function is not a recurrent layer, so it can not be time step. This function is not a recurrent layer, so it can not be
directly used to process sequence input. This function is always used in directly used to process sequence input. This function is always used in
the recurrent_group (see layers.py for more details) to implement attention the recurrent_group (see layers.py for more details) to implement attention
...@@ -1012,7 +1091,7 @@ def simple_gru(input, ...@@ -1012,7 +1091,7 @@ def simple_gru(input,
simple_gru in network.py. The reason why there are so many interfaces is simple_gru in network.py. The reason why there are so many interfaces is
that we have two ways to implement recurrent neural network. One way is to that we have two ways to implement recurrent neural network. One way is to
use one complete layer to implement rnn (including simple rnn, gru and lstm) use one complete layer to implement rnn (including simple rnn, gru and lstm)
with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But
the multiplication operation :math:`W x_t` is not computed in these layers. the multiplication operation :math:`W x_t` is not computed in these layers.
See details in their interfaces in layers.py. See details in their interfaces in layers.py.
The other implementation is to use an recurrent group which can ensemble a The other implementation is to use an recurrent group which can ensemble a
...@@ -1116,11 +1195,12 @@ def simple_gru2(input, ...@@ -1116,11 +1195,12 @@ def simple_gru2(input,
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activiation type of gru :param gate_act: gate activiation type of gru
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param gru_bias_attr: bias parameter attribute of gru layer, :param gru_bias_attr: bias parameter attribute of gru layer,
False means no bias, None means default bias. False means no bias, None means default bias.
:type gru_bias_attr: ParameterAttribute|False|None :type gru_bias_attr: ParameterAttribute|False|None
:param gru_layer_attr: Extra attribute of the gru layer. :param gru_param_attr: param parameter attribute of gru layer,
:type gru_layer_attr: ExtraLayerAttribute None means default param.
:type gru_param_attr: ParameterAttribute|None
:return: the gru group. :return: the gru group.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1188,7 +1268,7 @@ def bidirectional_gru(input, ...@@ -1188,7 +1268,7 @@ def bidirectional_gru(input,
:type size: int :type size: int
:param return_seq: If set False, the last time step of output are :param return_seq: If set False, the last time step of output are
concatenated and returned. concatenated and returned.
If set True, the entire output sequences in forward If set True, the entire output sequences in forward
and backward directions are concatenated and returned. and backward directions are concatenated and returned.
:type return_seq: bool :type return_seq: bool
:return: LayerOutput object. :return: LayerOutput object.
...@@ -1277,7 +1357,7 @@ def bidirectional_lstm(input, ...@@ -1277,7 +1357,7 @@ def bidirectional_lstm(input,
:type size: int :type size: int
:param return_seq: If set False, the last time step of output are :param return_seq: If set False, the last time step of output are
concatenated and returned. concatenated and returned.
If set True, the entire output sequences in forward If set True, the entire output sequences in forward
and backward directions are concatenated and returned. and backward directions are concatenated and returned.
:type return_seq: bool :type return_seq: bool
:return: LayerOutput object. :return: LayerOutput object.
......
...@@ -16,12 +16,13 @@ import regularizer ...@@ -16,12 +16,13 @@ import regularizer
from param_attr import ParamAttr from param_attr import ParamAttr
from data_feeder import DataFeeder from data_feeder import DataFeeder
from core import LoDTensor, CPUPlace, GPUPlace from core import LoDTensor, CPUPlace, GPUPlace
import clip
Tensor = LoDTensor Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + [ __all__ = framework.__all__ + executor.__all__ + [
'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr' 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr'
'DataFeeder' 'DataFeeder', 'clip'
] ]
......
import functools
import layers
__all__ = ['GradientClipByValue', 'append_gradient_clip_ops']
class BaseGradientClipAttr(object):
def process_context(self, context, p_g):
raise NotImplementedError()
def create_operators(self, param, grad):
raise NotImplementedError()
class NullGradientClipAttr(BaseGradientClipAttr):
def process_context(self, context, p_g):
pass
def create_operators(self, param, grad):
return param, grad
class GradientClipByValue(BaseGradientClipAttr):
def __init__(self, max, min=None):
max = float(max)
if min is None:
min = -max
else:
min = float(min)
self.max = max
self.min = min
def process_context(self, context, p_g):
pass
def create_operators(self, param, grad):
new_grad = layers.clip(x=grad, min=self.min, max=self.max)
return param, new_grad
def append_gradient_clip_ops(param_grad):
context = dict()
create_op_callbacks = []
for p, g in param_grad:
clip_attr = getattr(p, 'clip_attr', NullGradientClipAttr())
if clip_attr is None:
clip_attr = NullGradientClipAttr()
if not isinstance(clip_attr, BaseGradientClipAttr):
raise TypeError(
"clip attribute should be an instance of BaseGradientClippingAttr"
)
clip_attr.process_context(context=context, p_g=param_grad)
create_op_callbacks.append(
functools.partial(
clip_attr.create_operators, param=p, grad=g))
return [each_callback() for each_callback in create_op_callbacks]
ClipByValue = GradientClipByValue
import numpy as np import numpy as np
import layers import layers
from framework import Program, unique_name, Variable from framework import Program, unique_name, Variable, program_guard
from layer_helper import LayerHelper from layer_helper import LayerHelper
__all__ = ['Accuracy'] __all__ = ['Accuracy', 'ChunkEvaluator']
def _clone_var_(block, var): def _clone_var_(block, var):
...@@ -49,15 +49,12 @@ class Evaluator(object): ...@@ -49,15 +49,12 @@ class Evaluator(object):
if reset_program is None: if reset_program is None:
reset_program = Program() reset_program = Program()
for var in self.states: with program_guard(main_program=reset_program):
assert isinstance(var, Variable) for var in self.states:
g_var = _clone_var_(reset_program.current_block(), var) assert isinstance(var, Variable)
layers.fill_constant( g_var = _clone_var_(reset_program.current_block(), var)
shape=g_var.shape, layers.fill_constant(
value=0.0, shape=g_var.shape, value=0.0, dtype=g_var.dtype, out=g_var)
dtype=g_var.dtype,
out=g_var,
main_program=reset_program)
executor.run(reset_program) executor.run(reset_program)
...@@ -104,20 +101,14 @@ class Accuracy(Evaluator): ...@@ -104,20 +101,14 @@ class Accuracy(Evaluator):
self.total = self.create_state(dtype='int64', shape=[1], suffix='total') self.total = self.create_state(dtype='int64', shape=[1], suffix='total')
self.correct = self.create_state( self.correct = self.create_state(
dtype='int64', shape=[1], suffix='correct') dtype='int64', shape=[1], suffix='correct')
kwargs = {'main_program': main_program}
total = self.helper.create_tmp_variable(dtype='int') total = self.helper.create_tmp_variable(dtype='int')
correct = self.helper.create_tmp_variable(dtype='int') correct = self.helper.create_tmp_variable(dtype='int')
acc = layers.accuracy( acc = layers.accuracy(
input=input, input=input, label=label, k=k, total=total, correct=correct)
label=label, total = layers.cast(x=total, dtype='int64')
k=k, correct = layers.cast(x=correct, dtype='int64')
total=total, layers.sums(input=[self.total, total], out=self.total)
correct=correct, layers.sums(input=[self.correct, correct], out=self.correct)
**kwargs)
total = layers.cast(x=total, dtype='int64', **kwargs)
correct = layers.cast(x=correct, dtype='int64', **kwargs)
layers.sums(input=[self.total, total], out=self.total, **kwargs)
layers.sums(input=[self.correct, correct], out=self.correct, **kwargs)
self.metrics.append(acc) self.metrics.append(acc)
...@@ -125,10 +116,75 @@ class Accuracy(Evaluator): ...@@ -125,10 +116,75 @@ class Accuracy(Evaluator):
if eval_program is None: if eval_program is None:
eval_program = Program() eval_program = Program()
block = eval_program.current_block() block = eval_program.current_block()
kwargs = {'main_program': eval_program} with program_guard(main_program=eval_program):
total = _clone_var_(block, self.total) total = _clone_var_(block, self.total)
correct = _clone_var_(block, self.correct) correct = _clone_var_(block, self.correct)
total = layers.cast(total, dtype='float32', **kwargs) total = layers.cast(total, dtype='float32')
correct = layers.cast(correct, dtype='float32', **kwargs) correct = layers.cast(correct, dtype='float32')
out = layers.elementwise_div(x=correct, y=total, **kwargs) out = layers.elementwise_div(x=correct, y=total)
return np.array(executor.run(eval_program, fetch_list=[out])[0]) return np.array(executor.run(eval_program, fetch_list=[out])[0])
class ChunkEvaluator(Evaluator):
"""
Accumulate counter numbers output by chunk_eval from mini-batches and
compute the precision recall and F1-score using the accumulated counter
numbers.
"""
def __init__(
self,
input,
label,
chunk_scheme,
num_chunk_types,
excluded_chunk_types=None, ):
super(ChunkEvaluator, self).__init__("chunk_eval")
main_program = self.helper.main_program
if main_program.current_block().idx != 0:
raise ValueError("You can only invoke Evaluator in root block")
self.num_infer_chunks = self.create_state(
dtype='int64', shape=[1], suffix='num_infer_chunks')
self.num_label_chunks = self.create_state(
dtype='int64', shape=[1], suffix='num_label_chunks')
self.num_correct_chunks = self.create_state(
dtype='int64', shape=[1], suffix='num_correct_chunks')
precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
input=input,
label=label,
chunk_scheme=chunk_scheme,
num_chunk_types=num_chunk_types,
excluded_chunk_types=excluded_chunk_types, )
layers.sums(
input=[self.num_infer_chunks, num_infer_chunks],
out=self.num_infer_chunks)
layers.sums(
input=[self.num_label_chunks, num_label_chunks],
out=self.num_label_chunks)
layers.sums(
input=[self.num_correct_chunks, num_correct_chunks],
out=self.num_correct_chunks)
self.metrics.extend([precision, recall, f1_score])
def eval(self, executor, eval_program=None):
if eval_program is None:
eval_program = Program()
block = eval_program.current_block()
num_infer_chunks, num_label_chunks, num_correct_chunks = executor.run(
eval_program,
fetch_list=[_clone_var_(block, state) for state in self.states])
num_infer_chunks = num_infer_chunks[0]
num_label_chunks = num_label_chunks[0]
num_correct_chunks = num_correct_chunks[0]
precision = float(
num_correct_chunks) / num_infer_chunks if num_infer_chunks else 0
recall = float(
num_correct_chunks) / num_label_chunks if num_label_chunks else 0
f1_score = float(2 * precision * recall) / (
precision + recall) if num_correct_chunks else 0
return np.array(
[precision], dtype='float32'), np.array(
[recall], dtype='float32'), np.array(
[f1_score], dtype='float32')
...@@ -46,6 +46,13 @@ class Executor(object): ...@@ -46,6 +46,13 @@ class Executor(object):
p.set_place(each) p.set_place(each)
act_places.append(p) act_places.append(p)
# TODO(dzhwinter) : consider that our fluid tests all written in
# GPUPlace(gpu_id), this will be changed in next PR.
if core.is_compile_gpu():
core.init_devices(["CPU", "GPU:0"])
else:
core.init_devices(["CPU"])
self.executor = core.Executor(act_places) self.executor = core.Executor(act_places)
self.places = places self.places = places
......
import collections import collections
import contextlib
import numpy as np import numpy as np
from . import core
import proto.framework_pb2 as framework_pb2 import proto.framework_pb2 as framework_pb2
import google.protobuf.message from . import core
import contextlib
__all__ = [ __all__ = [
'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program',
...@@ -12,6 +12,18 @@ __all__ = [ ...@@ -12,6 +12,18 @@ __all__ = [
'switch_main_program' 'switch_main_program'
] ]
EMPTY_VAR_NAME = core.kEmptyVarName()
TEMP_VAR_NAME = core.kTempVarName()
GRAD_VAR_SUFFIX = core.kGradVarSuffix()
ZERO_VAR_SUFFIX = core.kZeroVarSuffix()
def grad_var_name(var_name):
"""
return gradient name for a certain var name
"""
return var_name + GRAD_VAR_SUFFIX
def unique_name(prefix): def unique_name(prefix):
""" """
...@@ -704,6 +716,7 @@ class Block(object): ...@@ -704,6 +716,7 @@ class Block(object):
trainable=p.trainable, trainable=p.trainable,
optimize_attr=p.optimize_attr, optimize_attr=p.optimize_attr,
regularizer=p.regularizer, regularizer=p.regularizer,
clip_attr=p.clip_attr,
name=v.name) name=v.name)
self.vars[new_p.name] = new_p self.vars[new_p.name] = new_p
...@@ -866,6 +879,8 @@ class Parameter(Variable): ...@@ -866,6 +879,8 @@ class Parameter(Variable):
self.regularizer = kwargs.get('regularizer', None) self.regularizer = kwargs.get('regularizer', None)
self.clip_attr = kwargs.get('clip_attr', None)
# program is a global instance. # program is a global instance.
_main_program_ = Program() _main_program_ = Program()
......
...@@ -21,19 +21,11 @@ class LayerHelper(object): ...@@ -21,19 +21,11 @@ class LayerHelper(object):
@property @property
def main_program(self): def main_program(self):
prog = self.kwargs.get('main_program', None) return default_main_program()
if prog is None:
return default_main_program()
else:
return prog
@property @property
def startup_program(self): def startup_program(self):
prog = self.kwargs.get('startup_program', None) return default_startup_program()
if prog is None:
return default_startup_program()
else:
return prog
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):
return self.main_program.current_block().append_op(*args, **kwargs) return self.main_program.current_block().append_op(*args, **kwargs)
...@@ -151,13 +143,6 @@ class LayerHelper(object): ...@@ -151,13 +143,6 @@ class LayerHelper(object):
persistable=True, persistable=True,
initializer=initializer) initializer=initializer)
@property
def to_kwargs(self):
return {
'main_program': self.main_program,
'startup_program': self.startup_program
}
def append_bias_op(self, input_var, dim_start=1, dim_end=None): def append_bias_op(self, input_var, dim_start=1, dim_end=None):
""" """
Append bias operator and return its output. If the user does not set Append bias operator and return its output. If the user does not set
......
...@@ -14,11 +14,7 @@ __all__ = [ ...@@ -14,11 +14,7 @@ __all__ = [
] ]
def split_lod_tensor(input, def split_lod_tensor(input, mask, level=0):
mask,
level=0,
main_program=None,
startup_program=None):
helper = LayerHelper('split_lod_tensor', **locals()) helper = LayerHelper('split_lod_tensor', **locals())
out_true = helper.create_tmp_variable(dtype=input.dtype) out_true = helper.create_tmp_variable(dtype=input.dtype)
out_false = helper.create_tmp_variable(dtype=input.dtype) out_false = helper.create_tmp_variable(dtype=input.dtype)
...@@ -34,13 +30,7 @@ def split_lod_tensor(input, ...@@ -34,13 +30,7 @@ def split_lod_tensor(input,
return out_true, out_false return out_true, out_false
def merge_lod_tensor(in_true, def merge_lod_tensor(in_true, in_false, x, mask, level=0):
in_false,
x,
mask,
level=0,
main_program=None,
startup_program=None):
helper = LayerHelper('merge_lod_tensor', **locals()) helper = LayerHelper('merge_lod_tensor', **locals())
out = helper.create_tmp_variable(dtype=in_true.dtype) out = helper.create_tmp_variable(dtype=in_true.dtype)
helper.append_op( helper.append_op(
...@@ -135,9 +125,8 @@ class StaticRNN(object): ...@@ -135,9 +125,8 @@ class StaticRNN(object):
IN_RNN_BLOCK = 1 IN_RNN_BLOCK = 1
AFTER_RNN_BLOCK = 2 AFTER_RNN_BLOCK = 2
def __init__(self, name=None, main_program=None): def __init__(self, name=None):
self.helper = LayerHelper( self.helper = LayerHelper("static_rnn", name=name)
"static_rnn", name=name, main_program=main_program)
self.memories = {} # memory map, from pre_mem.name --> MemoryLink self.memories = {} # memory map, from pre_mem.name --> MemoryLink
self.inputs = [] # input variable list in current block self.inputs = [] # input variable list in current block
self.outputs = [] # output variable list in parent block self.outputs = [] # output variable list in parent block
...@@ -354,8 +343,8 @@ class While(object): ...@@ -354,8 +343,8 @@ class While(object):
IN_WHILE_BLOCK = 1 IN_WHILE_BLOCK = 1
AFTER_WHILE_BLOCK = 2 AFTER_WHILE_BLOCK = 2
def __init__(self, cond, name=None, main_program=None): def __init__(self, cond, name=None):
self.helper = LayerHelper("while", name=name, main_program=main_program) self.helper = LayerHelper("while", name=name)
self.status = While.BEFORE_WHILE_BLOCK self.status = While.BEFORE_WHILE_BLOCK
if not isinstance(cond, Variable): if not isinstance(cond, Variable):
raise TypeError("condition should be a variable") raise TypeError("condition should be a variable")
...@@ -406,7 +395,7 @@ class While(object): ...@@ -406,7 +395,7 @@ class While(object):
attrs={'sub_block': while_block}) attrs={'sub_block': while_block})
def lod_rank_table(x, level=0, main_program=None): def lod_rank_table(x, level=0):
""" """
This function creates an operator for creating a LOD_RANK_TABLE This function creates an operator for creating a LOD_RANK_TABLE
using the input x. using the input x.
...@@ -423,7 +412,7 @@ def lod_rank_table(x, level=0, main_program=None): ...@@ -423,7 +412,7 @@ def lod_rank_table(x, level=0, main_program=None):
return table return table
def max_sequence_len(rank_table, main_program=None): def max_sequence_len(rank_table):
""" """
This function creates an operator to calculate the length of This function creates an operator to calculate the length of
max seqence through input rank_table(should be a lod_rank_table) max seqence through input rank_table(should be a lod_rank_table)
...@@ -437,7 +426,7 @@ def max_sequence_len(rank_table, main_program=None): ...@@ -437,7 +426,7 @@ def max_sequence_len(rank_table, main_program=None):
return res return res
def topk(input, k, main_program=None, startup_program=None): def topk(input, k):
helper = LayerHelper('topk', **locals()) helper = LayerHelper('topk', **locals())
topk_out = helper.create_tmp_variable(dtype=input.data_type) topk_out = helper.create_tmp_variable(dtype=input.data_type)
topk_indices = helper.create_tmp_variable(dtype='int64') topk_indices = helper.create_tmp_variable(dtype='int64')
...@@ -450,7 +439,7 @@ def topk(input, k, main_program=None, startup_program=None): ...@@ -450,7 +439,7 @@ def topk(input, k, main_program=None, startup_program=None):
return topk_out, topk_indices return topk_out, topk_indices
def lod_tensor_to_array(x, table, main_program=None): def lod_tensor_to_array(x, table):
""" """
This function creates an operator to convert an LOD_Tensor to This function creates an operator to convert an LOD_Tensor to
an array. an array.
...@@ -468,7 +457,7 @@ def lod_tensor_to_array(x, table, main_program=None): ...@@ -468,7 +457,7 @@ def lod_tensor_to_array(x, table, main_program=None):
return array return array
def array_to_lod_tensor(x, table, main_program=None, startup_program=None): def array_to_lod_tensor(x, table):
""" """
This function creates an operator to convert an array to a This function creates an operator to convert an array to a
LOD_Tensor. LOD_Tensor.
...@@ -483,11 +472,7 @@ def array_to_lod_tensor(x, table, main_program=None, startup_program=None): ...@@ -483,11 +472,7 @@ def array_to_lod_tensor(x, table, main_program=None, startup_program=None):
return tmp return tmp
def increment(x, def increment(x, value=1.0, in_place=True):
value=1.0,
in_place=True,
main_program=None,
startup_program=None):
""" """
This function creates an operator to increment each value in the input This function creates an operator to increment each value in the input
`x` by an amount: `value` as mentioned in the input parameter. This `x` by an amount: `value` as mentioned in the input parameter. This
...@@ -506,7 +491,7 @@ def increment(x, ...@@ -506,7 +491,7 @@ def increment(x,
return out return out
def array_write(x, i, array=None, main_program=None, startup_program=None): def array_write(x, i, array=None):
""" """
This function creates an operator to write the data out as a This function creates an operator to write the data out as a
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
...@@ -525,7 +510,7 @@ def array_write(x, i, array=None, main_program=None, startup_program=None): ...@@ -525,7 +510,7 @@ def array_write(x, i, array=None, main_program=None, startup_program=None):
return array return array
def create_array(dtype, main_program=None): def create_array(dtype):
helper = LayerHelper("array", **locals()) helper = LayerHelper("array", **locals())
return helper.create_variable( return helper.create_variable(
name="{0}.out".format(helper.name), name="{0}.out".format(helper.name),
...@@ -533,7 +518,25 @@ def create_array(dtype, main_program=None): ...@@ -533,7 +518,25 @@ def create_array(dtype, main_program=None):
dtype=dtype) dtype=dtype)
def less_than(x, y, cond=None, main_program=None, **ignored): def less_than(x, y, cond=None, **ignored):
"""
**Less than**
This layer returns the truth value of :math:`x < y` elementwise.
Args:
x(Variable): First operand of *less_than*
y(Variable): Second operand of *less_than*
cond(Variable|None): Optional output variable to store the result of *less_than*
Returns:
Variable: The tensor variable storing the output of *less_than*.
Examples:
.. code-block:: python
less = fluid.layers.less_than(x=label, y=limit)
"""
helper = LayerHelper("less_than", **locals()) helper = LayerHelper("less_than", **locals())
if cond is None: if cond is None:
cond = helper.create_tmp_variable(dtype='bool') cond = helper.create_tmp_variable(dtype='bool')
...@@ -545,7 +548,7 @@ def less_than(x, y, cond=None, main_program=None, **ignored): ...@@ -545,7 +548,7 @@ def less_than(x, y, cond=None, main_program=None, **ignored):
return cond return cond
def array_read(array, i, main_program=None, startup_program=None): def array_read(array, i):
""" """
This function creates an operator to read the data in as a This function creates an operator to read the data in as a
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
...@@ -564,7 +567,7 @@ def array_read(array, i, main_program=None, startup_program=None): ...@@ -564,7 +567,7 @@ def array_read(array, i, main_program=None, startup_program=None):
return out return out
def shrink_memory(x, i, table, main_program=None, startup_program=None): def shrink_memory(x, i, table):
""" """
This function creates an operator to shrink_rnn_memory using the RankTable This function creates an operator to shrink_rnn_memory using the RankTable
as mentioned in the input parameter. as mentioned in the input parameter.
...@@ -581,7 +584,7 @@ def shrink_memory(x, i, table, main_program=None, startup_program=None): ...@@ -581,7 +584,7 @@ def shrink_memory(x, i, table, main_program=None, startup_program=None):
return out return out
def array_length(array, main_program=None): def array_length(array):
""" """
This function creates an operator to find the length of the This function creates an operator to find the length of the
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
...@@ -611,20 +614,12 @@ class ConditionalBlockGuard(BlockGuard): ...@@ -611,20 +614,12 @@ class ConditionalBlockGuard(BlockGuard):
class ConditionalBlock(object): class ConditionalBlock(object):
def __init__(self, def __init__(self, inputs, name=None):
inputs,
name=None,
main_program=None,
startup_program=None):
for each_input in inputs: for each_input in inputs:
if not isinstance(each_input, Variable): if not isinstance(each_input, Variable):
raise TypeError("Each input should be variable") raise TypeError("Each input should be variable")
self.inputs = inputs self.inputs = inputs
self.helper = LayerHelper( self.helper = LayerHelper('conditional_block', name=name)
'conditional_block',
name=name,
main_program=main_program,
startup_program=startup_program)
def block(self): def block(self):
return ConditionalBlockGuard(self) return ConditionalBlockGuard(self)
...@@ -709,15 +704,10 @@ class IfElse(object): ...@@ -709,15 +704,10 @@ class IfElse(object):
IN_IF_ELSE_TRUE_BLOCKS = 1 IN_IF_ELSE_TRUE_BLOCKS = 1
IN_IF_ELSE_FALSE_BLOCKS = 2 IN_IF_ELSE_FALSE_BLOCKS = 2
def __init__(self, cond, name=None, main_program=None, def __init__(self, cond, name=None):
startup_program=None):
if not isinstance(cond, Variable): if not isinstance(cond, Variable):
raise TypeError("cond must be a Variable") raise TypeError("cond must be a Variable")
self.helper = LayerHelper( self.helper = LayerHelper('ifelse', name=name)
'ifelse',
name=name,
main_program=main_program,
startup_program=startup_program)
self.cond = cond self.cond = cond
self.input_table = {} self.input_table = {}
self.status = IfElse.OUT_IF_ELSE_BLOCKS self.status = IfElse.OUT_IF_ELSE_BLOCKS
...@@ -782,11 +772,7 @@ class IfElse(object): ...@@ -782,11 +772,7 @@ class IfElse(object):
out_table.append(outside_out) out_table.append(outside_out)
# assign local var to outside # assign local var to outside
assign( assign(input=each_out, output=outside_out)
input=each_out,
output=outside_out,
main_program=self.helper.main_program,
startup_program=self.helper.startup_program)
def __call__(self): def __call__(self):
if self.status != self.OUT_IF_ELSE_BLOCKS: if self.status != self.OUT_IF_ELSE_BLOCKS:
...@@ -810,9 +796,7 @@ class IfElse(object): ...@@ -810,9 +796,7 @@ class IfElse(object):
in_false=false_var, in_false=false_var,
mask=self.cond, mask=self.cond,
x=self.cond, x=self.cond,
level=0, level=0))
main_program=self.helper.main_program,
startup_program=self.helper.startup_program))
return rlist return rlist
...@@ -821,12 +805,8 @@ class DynamicRNN(object): ...@@ -821,12 +805,8 @@ class DynamicRNN(object):
IN_RNN = 1 IN_RNN = 1
AFTER_RNN = 2 AFTER_RNN = 2
def __init__(self, name=None, main_program=None, startup_program=None): def __init__(self, name=None):
self.helper = LayerHelper( self.helper = LayerHelper('dynamic_rnn', name=name)
'dynamic_rnn',
name=name,
main_program=main_program,
startup_program=startup_program)
self.status = DynamicRNN.BEFORE_RNN self.status = DynamicRNN.BEFORE_RNN
self.lod_rank_table = None self.lod_rank_table = None
self.max_seq_len = None self.max_seq_len = None
...@@ -880,8 +860,7 @@ class DynamicRNN(object): ...@@ -880,8 +860,7 @@ class DynamicRNN(object):
inputs={'X': x, inputs={'X': x,
'RankTable': self.lod_rank_table}, 'RankTable': self.lod_rank_table},
outputs={'Out': input_array}) outputs={'Out': input_array})
return array_read( return array_read(array=input_array, i=self.step_idx)
array=input_array, i=self.step_idx, **self.helper.to_kwargs)
@contextlib.contextmanager @contextlib.contextmanager
def block(self): def block(self):
...@@ -892,32 +871,18 @@ class DynamicRNN(object): ...@@ -892,32 +871,18 @@ class DynamicRNN(object):
self.status = DynamicRNN.IN_RNN self.status = DynamicRNN.IN_RNN
with self.while_op.block(): with self.while_op.block():
yield yield
increment( increment(x=self.step_idx, value=1.0, in_place=True)
x=self.step_idx,
value=1.0,
in_place=True,
**self.helper.to_kwargs)
for new_mem, mem_array in self.mem_link: for new_mem, mem_array in self.mem_link:
array_write( array_write(x=new_mem, i=self.step_idx, array=mem_array)
x=new_mem,
i=self.step_idx, less_than(x=self.step_idx, y=self.max_seq_len, cond=self.cond)
array=mem_array,
**self.helper.to_kwargs)
less_than(
x=self.step_idx,
y=self.max_seq_len,
cond=self.cond,
**self.helper.to_kwargs)
self.status = DynamicRNN.AFTER_RNN self.status = DynamicRNN.AFTER_RNN
for each_array in self.output_array: for each_array in self.output_array:
self.outputs.append( self.outputs.append(
array_to_lod_tensor( array_to_lod_tensor(
x=each_array, x=each_array, table=self.lod_rank_table))
table=self.lod_rank_table,
**self.helper.to_kwargs))
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
if self.status != DynamicRNN.AFTER_RNN: if self.status != DynamicRNN.AFTER_RNN:
...@@ -944,13 +909,9 @@ class DynamicRNN(object): ...@@ -944,13 +909,9 @@ class DynamicRNN(object):
inputs={'X': init, inputs={'X': init,
'I': self.zero_idx}, 'I': self.zero_idx},
outputs={'Out': mem_array}) outputs={'Out': mem_array})
retv = array_read( retv = array_read(array=mem_array, i=self.step_idx)
array=mem_array, i=self.step_idx, **self.helper.to_kwargs)
retv = shrink_memory( retv = shrink_memory(
x=retv, x=retv, i=self.step_idx, table=self.lod_rank_table)
i=self.step_idx,
table=self.lod_rank_table,
**self.helper.to_kwargs)
self.mem_dict[retv.name] = mem_array self.mem_dict[retv.name] = mem_array
return retv return retv
else: else:
......
...@@ -10,8 +10,6 @@ def data(name, ...@@ -10,8 +10,6 @@ def data(name,
dtype='float32', dtype='float32',
lod_level=0, lod_level=0,
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
main_program=None,
startup_program=None,
stop_gradient=True): stop_gradient=True):
""" """
Data Layer. Data Layer.
......
...@@ -5,12 +5,15 @@ All layers just related to the neural network. ...@@ -5,12 +5,15 @@ All layers just related to the neural network.
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable from ..framework import Variable
from ..param_attr import ParamAttr
from tensor import concat
__all__ = [ __all__ = [
'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf', 'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf',
'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy', 'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy',
'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d', 'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
'batch_norm', 'beam_search_decode', 'conv2d_transpose' 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
'lstm_unit', 'reduce_sum'
] ]
...@@ -20,36 +23,50 @@ def fc(input, ...@@ -20,36 +23,50 @@ def fc(input,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
act=None, act=None,
name=None, name=None):
main_program=None,
startup_program=None):
""" """
Fully Connected Layer. **Fully Connected Layer**
This layer accepts multiple inputs and applies a linear transformation to each input.
If activation type is provided, the corresponding activation function is applied to the
output of the linear transformation. For each input :math:`X`, the equation is:
.. math::
Out = Act(WX + b)
In the above equation:
* :math:`X`: Input value, a tensor with rank at least 2.
* :math:`W`: Weight, a 2-D tensor with shape [M, N].
* :math:`b`: Bias, a 2-D tensor with shape [M, 1].
* :math:`Act`: Activation function.
* :math:`Out`: Output value, same shape with :math:`X`.
All the input variables are passed in as local variables to the LayerHelper
constructor.
Args: Args:
input: The input tensor to the function input(Variable|list): Input tensors. Each tensor has a rank of atleast 2
size: The size of the layer size(int): Output size
num_flatten_dims: Number of columns in input num_flatten_dims(int): Number of columns in input
param_attr: The parameters/weights to the FC Layer param_attr(ParamAttr|list): The parameters/weights to the FC Layer
param_initializer: Initializer used for the weight/parameter. If None, XavierInitializer() is used bias_attr(ParamAttr|list): Bias parameter for the FC layer
bias_attr: The bias parameter for the FC layer act(str): Activation type
bias_initializer: Initializer used for the bias. If None, then ConstantInitializer() is used name(str): Name/alias of the function
act: Activation to be applied to the output of FC layer
name: Name/alias of the function
main_program: Name of the main program that calls this
startup_program: Name of the startup program
This function can take in multiple inputs and performs the Fully Connected
function (linear transformation) on top of each of them.
So for input x, the output will be : Wx + b. Where W is the parameter,
b the bias and x is the input.
The function also applies an activation (non-linearity) on top of the
output, if activation is passed in the input.
All the input variables of this function are passed in as local variables
to the LayerHelper constructor.
Returns:
Variable: The tensor variable storing the transformation and \
non-linearity activation result.
Raises:
ValueError: If rank of input tensor is less than 2.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[32, 32], dtype='float32')
fc = fluid.layers.fc(input=data, size=1000, act="tanh")
""" """
helper = LayerHelper('fc', **locals()) helper = LayerHelper('fc', **locals())
...@@ -88,33 +105,32 @@ def fc(input, ...@@ -88,33 +105,32 @@ def fc(input,
return helper.append_activation(pre_activation) return helper.append_activation(pre_activation)
def embedding(input, def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
size,
is_sparse=False,
param_attr=None,
dtype='float32',
main_program=None,
startup_program=None):
""" """
Embedding Layer. **Embedding Layer**
This layer is used to lookup a vector of IDs, provided by *input*, in a lookup table.
The result of this lookup is the embedding of each ID in the *input*.
All the input variables are passed in as local variables to the LayerHelper
constructor.
Args: Args:
param_initializer: input(Variable): Input to the function
input: The input to the function size(int): Output size
size: The size of the layer is_sparse(bool): Boolean flag that specifying whether the input is sparse
is_sparse: A flag that decleares whether the input is sparse param_attr(ParamAttr): Parameters for this layer
param_attr: Parameters for this layer dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
dtype: The type of data : float32, float_16, int etc
main_program: Name of the main program that calls this
startup_program: Name of the startup program
This function can take in the input (which is a vector of IDs) and Returns:
performs a lookup in the lookup_table using these IDs, to result into Variable: The tensor variable storing the embeddings of the \
the embedding of each ID in the input. supplied inputs.
All the input variables of this function are passed in as local variables Examples:
to the LayerHelper constructor. .. code-block:: python
data = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32')
fc = fluid.layers.embedding(input=data, size=16)
""" """
helper = LayerHelper('embedding', **locals()) helper = LayerHelper('embedding', **locals())
...@@ -140,9 +156,7 @@ def dynamic_lstm(input, ...@@ -140,9 +156,7 @@ def dynamic_lstm(input,
gate_activation='sigmoid', gate_activation='sigmoid',
cell_activation='tanh', cell_activation='tanh',
candidate_activation='tanh', candidate_activation='tanh',
dtype='float32', dtype='float32'):
main_program=None,
startup_program=None):
helper = LayerHelper('lstm', **locals()) helper = LayerHelper('lstm', **locals())
size = size / 4 size = size / 4
weight = helper.create_parameter( weight = helper.create_parameter(
...@@ -185,9 +199,7 @@ def gru_unit(input, ...@@ -185,9 +199,7 @@ def gru_unit(input,
weight=None, weight=None,
bias=None, bias=None,
activation='tanh', activation='tanh',
gate_activation='sigmoid', gate_activation='sigmoid'):
main_program=None,
startup_program=None):
""" """
GRUUnit Operator implements partial calculations of the GRU unit as following: GRUUnit Operator implements partial calculations of the GRU unit as following:
...@@ -250,11 +262,7 @@ def gru_unit(input, ...@@ -250,11 +262,7 @@ def gru_unit(input,
return updated_hidden, reset_hidden_pre, gate return updated_hidden, reset_hidden_pre, gate
def linear_chain_crf(input, def linear_chain_crf(input, label, param_attr=None):
label,
param_attr=None,
main_program=None,
startup_program=None):
helper = LayerHelper('linear_chain_crf', **locals()) helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[1] size = input.shape[1]
transition = helper.create_parameter( transition = helper.create_parameter(
...@@ -280,11 +288,7 @@ def linear_chain_crf(input, ...@@ -280,11 +288,7 @@ def linear_chain_crf(input,
return log_likelihood return log_likelihood
def crf_decoding(input, def crf_decoding(input, param_attr, label=None):
param_attr,
label=None,
main_program=None,
startup_program=None):
helper = LayerHelper('crf_decoding', **locals()) helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name) transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype()) viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype())
...@@ -392,8 +396,8 @@ def chunk_eval(input, ...@@ -392,8 +396,8 @@ def chunk_eval(input,
excluded_chunk_types=None, excluded_chunk_types=None,
**kwargs): **kwargs):
""" """
This function computes the accuracy using the input and label. This function computes and outputs the precision, recall and
The output is the top_k inputs and their indices. F1-score of chunk detection.
""" """
helper = LayerHelper("chunk_eval", **kwargs) helper = LayerHelper("chunk_eval", **kwargs)
...@@ -401,6 +405,9 @@ def chunk_eval(input, ...@@ -401,6 +405,9 @@ def chunk_eval(input,
precision = helper.create_tmp_variable(dtype="float32") precision = helper.create_tmp_variable(dtype="float32")
recall = helper.create_tmp_variable(dtype="float32") recall = helper.create_tmp_variable(dtype="float32")
f1_score = helper.create_tmp_variable(dtype="float32") f1_score = helper.create_tmp_variable(dtype="float32")
num_infer_chunks = helper.create_tmp_variable(dtype="int64")
num_label_chunks = helper.create_tmp_variable(dtype="int64")
num_correct_chunks = helper.create_tmp_variable(dtype="int64")
helper.append_op( helper.append_op(
type="chunk_eval", type="chunk_eval",
...@@ -409,14 +416,17 @@ def chunk_eval(input, ...@@ -409,14 +416,17 @@ def chunk_eval(input,
outputs={ outputs={
"Precision": [precision], "Precision": [precision],
"Recall": [recall], "Recall": [recall],
"F1-Score": [f1_score] "F1-Score": [f1_score],
"NumInferChunks": [num_infer_chunks],
"NumLabelChunks": [num_label_chunks],
"NumCorrectChunks": [num_correct_chunks]
}, },
attrs={ attrs={
"num_chunk_types": num_chunk_types, "num_chunk_types": num_chunk_types,
'chunk_scheme': chunk_scheme, "chunk_scheme": chunk_scheme,
'excluded_chunk_types': excluded_chunk_types or [] "excluded_chunk_types": excluded_chunk_types or []
}) })
return precision, recall, f1_score return precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks
def sequence_conv(input, def sequence_conv(input,
...@@ -426,9 +436,7 @@ def sequence_conv(input, ...@@ -426,9 +436,7 @@ def sequence_conv(input,
padding=None, padding=None,
bias_attr=None, bias_attr=None,
param_attr=None, param_attr=None,
act=None, act=None):
main_program=None,
startup_program=None):
""" """
This function creates the op for sequence_conv, using the inputs and This function creates the op for sequence_conv, using the inputs and
other convolutional configurations for the filters and stride as given other convolutional configurations for the filters and stride as given
...@@ -471,9 +479,7 @@ def conv2d(input, ...@@ -471,9 +479,7 @@ def conv2d(input,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
act=None, act=None,
name=None, name=None):
main_program=None,
startup_program=None):
""" """
This function creates the op for a 2-dimensional Convolution. This function creates the op for a 2-dimensional Convolution.
This is performed using the parameters of filters(size, dimensionality etc) This is performed using the parameters of filters(size, dimensionality etc)
...@@ -559,9 +565,7 @@ def pool2d(input, ...@@ -559,9 +565,7 @@ def pool2d(input,
pool_type, pool_type,
pool_stride=None, pool_stride=None,
pool_padding=None, pool_padding=None,
global_pooling=False, global_pooling=False):
main_program=None,
startup_program=None):
""" """
This function adds the operator for pooling in 2 dimensions, using the This function adds the operator for pooling in 2 dimensions, using the
pooling configurations mentioned in input parameters. pooling configurations mentioned in input parameters.
...@@ -607,9 +611,7 @@ def batch_norm(input, ...@@ -607,9 +611,7 @@ def batch_norm(input,
epsilon=1e-05, epsilon=1e-05,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
data_layout='NCHW', data_layout='NCHW'):
main_program=None,
startup_program=None):
""" """
This function helps create an operator to implement This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters. the BatchNorm layer using the configurations from the input parameters.
...@@ -679,7 +681,7 @@ def batch_norm(input, ...@@ -679,7 +681,7 @@ def batch_norm(input,
return helper.append_activation(batch_norm_out) return helper.append_activation(batch_norm_out)
def beam_search_decode(ids, scores, main_program=None, startup_program=None): def beam_search_decode(ids, scores):
helper = LayerHelper('beam_search_decode', **locals()) helper = LayerHelper('beam_search_decode', **locals())
sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) sentence_ids = helper.create_tmp_variable(dtype=ids.dtype)
sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) sentence_scores = helper.create_tmp_variable(dtype=ids.dtype)
...@@ -703,9 +705,7 @@ def conv2d_transpose(input, ...@@ -703,9 +705,7 @@ def conv2d_transpose(input,
padding=None, padding=None,
stride=None, stride=None,
dilation=None, dilation=None,
param_attr=None, param_attr=None):
main_program=None,
startup_program=None):
""" """
The transpose of conv2d layer. The transpose of conv2d layer.
...@@ -795,3 +795,220 @@ def conv2d_transpose(input, ...@@ -795,3 +795,220 @@ def conv2d_transpose(input,
attrs=op_attr) attrs=op_attr)
return out return out
def sequence_expand(x, y):
"""Sequence Expand Layer. This layer will expand the input variable **x**
according to LoD information of **y**. And the following examples will
explain how sequence_expand works:
.. code-block:: text
* Case 1
x is a LoDTensor:
x.lod = [[0, 2, 3],
[0, 1, 3, 4]]
x.data = [a, b, c, d]
x.dims = [4, 1]
y is a LoDTensor:
y.lod = [[0, 2, 4],
[0, 3, 6, 7, 8]]
with condition len(y.lod[-1]) - 1 == x.dims[0]
then output is a 2-level LoDTensor:
out.lod = [[0, 2, 4],
[0, 3, 6, 7, 8]]
out.data = [a, a, a, b, b, b, c, d]
out.dims = [8, 1]
* Case 2
x is a Tensor:
x.data = [a, b, c]
x.dims = [3, 1]
y is a LoDTensor:
y.lod = [[0, 2, 3, 6]]
with condition len(y.lod[-1]) - 1 == x.dims[0]
then output is a 1-level LoDTensor:
out.lod = [[0, 2, 3, 6]]
out.data = [a, a, b, c, c, c]
out.dims = [6, 1]
Args:
x (Variable): The input variable which is a Tensor or LoDTensor.
y (Variable): The input variable which is a LoDTensor.
Returns:
Variable: The expanded variable which is a LoDTensor.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10], dtype='float32')
y = fluid.layers.data(name='y', shape=[10, 20],
dtype='float32', lod_level=1)
out = layers.sequence_expand(x=x, y=y)
"""
helper = LayerHelper('sequence_expand', input=x, **locals())
dtype = helper.input_dtype()
tmp = helper.create_tmp_variable(dtype)
helper.append_op(
type='sequence_expand', inputs={'X': x,
'Y': y}, outputs={'Out': tmp})
return tmp
def lstm_unit(x_t,
hidden_t_prev,
cell_t_prev,
forget_bias=0.0,
param_attr=None,
bias_attr=None):
"""Lstm unit layer. The equation of a lstm step is:
.. math::
i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)
f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)
c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c)
o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)
h_t & = o_t tanh(c_t)
The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and
:math:`c_{t-1}`. The implementation separates the linear transformation
and non-linear transformation apart. Here, we take :math:`i_t` as an
example. The linear transformation is applied by calling a `fc` layer and
the equation is:
.. math::
L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i
The non-linear transformation is applied by calling `lstm_unit_op` and the
equation is:
.. math::
i_t = \sigma(L_{i_t})
This layer has two outputs including :math:`h_t` and :math:`o_t`.
Args:
x_t (Variable): The input value of current step.
hidden_t_prev (Variable): The hidden value of lstm unit.
cell_t_prev (Variable): The cell value of lstm unit.
forget_bias (float): The forget bias of lstm unit.
param_attr (ParamAttr): The attributes of parameter weights, used to set
initializer, name etc.
bias_attr (ParamAttr): The attributes of bias weights, if not False,
bias weights will be created and be set to default value.
Returns:
tuple: The hidden value and cell value of lstm unit.
Raises:
ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\
not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \
and **cell_t_prev** not be the same.
Examples:
.. code-block:: python
x_t = fluid.layers.fc(input=x_t_data, size=10)
prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20)
prev_cell = fluid.layers.fc(input=prev_cell_data, size=30)
hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t,
hidden_t_prev=prev_hidden,
cell_t_prev=prev_cell)
"""
helper = LayerHelper('lstm_unit', **locals())
if len(x_t.shape) != 2:
raise ValueError("Rank of x_t must be 2.")
if len(hidden_t_prev.shape) != 2:
raise ValueError("Rank of hidden_t_prev must be 2.")
if len(cell_t_prev.shape) != 2:
raise ValueError("Rank of cell_t_prev must be 2.")
if x_t.shape[0] != hidden_t_prev.shape[0] or x_t.shape[
0] != cell_t_prev.shape[0]:
raise ValueError("The 1s dimension of x_t, hidden_t_prev and "
"cell_t_prev must be the same.")
if bias_attr is None:
bias_attr = ParamAttr()
size = cell_t_prev.shape[1]
concat_out = concat(input=[x_t, hidden_t_prev], axis=1)
fc_out = fc(input=concat_out,
size=4 * size,
param_attr=param_attr,
bias_attr=bias_attr)
dtype = x_t.dtype
c = helper.create_tmp_variable(dtype)
h = helper.create_tmp_variable(dtype)
helper.append_op(
type='lstm_unit',
inputs={"X": fc_out,
"C_prev": cell_t_prev},
outputs={"C": c,
"H": h},
attrs={"forget_bias": forget_bias})
return h, c
def reduce_sum(input, dim=None, keep_dim=False):
"""
Computes the sum of tensor elements over the given dimension.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int|None): The dimension along which the sum is performed. If
:attr:`None`, sum all elements of :attr:`input` and return a
Tensor variable with a single element, otherwise must be in the
range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
the dimension to reduce is :math:`rank + dim`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
Returns:
Variable: The reduced Tensor variable.
Examples:
.. code-block:: python
# x is a Tensor variable with following elements:
# [[0.2, 0.3, 0.5, 0.9]
# [0.1, 0.2, 0.6, 0.7]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_sum(x) # [3.5]
fluid.layers.reduce_sum(x, dim=0) # [0.3, 0.5, 1.1, 1.6]
fluid.layers.reduce_sum(x, dim=-1) # [1.9, 1.6]
fluid.layers.reduce_sum(x, dim=1, keep_dim=True) # [[1.9], [1.6]]
"""
helper = LayerHelper('reduce_sum', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='reduce_sum',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else 0,
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
return out
...@@ -2,7 +2,7 @@ from ..registry import register_layer ...@@ -2,7 +2,7 @@ from ..registry import register_layer
__all__ = [ __all__ = [
'mean', 'mul', 'dropout', 'reshape', 'sigmoid', 'scale', 'transpose', 'mean', 'mul', 'dropout', 'reshape', 'sigmoid', 'scale', 'transpose',
'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div', 'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div',
'elementwise_sub', 'elementwise_mul', 'clip', 'abs' 'elementwise_sub', 'elementwise_mul', 'clip', 'abs', 'sequence_softmax'
] ]
for _OP in set(__all__): for _OP in set(__all__):
......
...@@ -6,12 +6,12 @@ __all__ = [ ...@@ -6,12 +6,12 @@ __all__ = [
] ]
def create_tensor(dtype, name=None, main_program=None, startup_program=None): def create_tensor(dtype, name=None):
helper = LayerHelper("create_tensor", **locals()) helper = LayerHelper("create_tensor", **locals())
return helper.create_variable(name=helper.name, dtype=dtype) return helper.create_variable(name=helper.name, dtype=dtype)
def cast(x, dtype, main_program=None): def cast(x, dtype):
""" """
This function takes in the input with input_dtype This function takes in the input with input_dtype
and casts it to the output_dtype as the output. and casts it to the output_dtype as the output.
...@@ -27,7 +27,7 @@ def cast(x, dtype, main_program=None): ...@@ -27,7 +27,7 @@ def cast(x, dtype, main_program=None):
return out return out
def concat(input, axis, main_program=None, startup_program=None): def concat(input, axis):
""" """
This function concats the input along the axis mentioned This function concats the input along the axis mentioned
and returns that as the output. and returns that as the output.
...@@ -42,7 +42,7 @@ def concat(input, axis, main_program=None, startup_program=None): ...@@ -42,7 +42,7 @@ def concat(input, axis, main_program=None, startup_program=None):
return out return out
def sums(input, out=None, main_program=None, startup_program=None): def sums(input, out=None):
""" """
This function takes in the input and performs the sum operation on it This function takes in the input and performs the sum operation on it
and returns that as the output. and returns that as the output.
...@@ -54,7 +54,7 @@ def sums(input, out=None, main_program=None, startup_program=None): ...@@ -54,7 +54,7 @@ def sums(input, out=None, main_program=None, startup_program=None):
return out return out
def assign(input, output, main_program=None, startup_program=None): def assign(input, output):
helper = LayerHelper('assign', **locals()) helper = LayerHelper('assign', **locals())
helper.append_op( helper.append_op(
type='scale', type='scale',
...@@ -64,16 +64,28 @@ def assign(input, output, main_program=None, startup_program=None): ...@@ -64,16 +64,28 @@ def assign(input, output, main_program=None, startup_program=None):
return output return output
def fill_constant(shape, def fill_constant(shape, dtype, value, out=None):
dtype,
value,
out=None,
main_program=None,
startup_program=None):
""" """
This function creates a tensor , with shape as mentioned in the input and **fill_constant**
specified dtype and fills this up with a constant value that
comes in the input. It also sets the stop_gradient to be True. This function creates a tensor of specified *shape* and
*dtype*, and initializes this with a constant supplied in *value*.
It also sets *stop_gradient* to True.
Args:
shape(tuple|list|None): Shape of output tensor
dtype(np.dtype|core.DataType|str): Data type of output tensor
value(float): Constant value to initialize the output tensor
out(Variable): Output Variable to initialize
Returns:
Variable: The tensor variable storing the output
Examples:
.. code-block:: python
data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
""" """
helper = LayerHelper("fill_constant", **locals()) helper = LayerHelper("fill_constant", **locals())
if out is None: if out is None:
...@@ -94,9 +106,32 @@ def fill_constant_batch_size_like(input, ...@@ -94,9 +106,32 @@ def fill_constant_batch_size_like(input,
dtype, dtype,
value, value,
input_dim_idx=0, input_dim_idx=0,
output_dim_idx=0, output_dim_idx=0):
main_program=None, """
startup_program=None): **fill_constant_batch_size_like**
This function creates a tensor of specified *shape*, *dtype* and batch size,
and initializes this with a constant supplied in *value*. The batch size is
obtained from the `input` tensor.
It also sets *stop_gradient* to True.
Args:
input(Variable): Tensor whose dimensions will be used to get batch size
shape(tuple|list|None): Shape of output tensor
dtype(np.dtype|core.DataType|str): Data type of output tensor
value(float): Constant value to initialize the output tensor
input_dim_idx(int): Index of input's batch size dimension
output_dim_idx(int): Index of output's batch size dimension
Returns:
Variable: The tensor variable storing the output
Examples:
.. code-block:: python
data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
"""
helper = LayerHelper("fill_constant_batch_size_like", **locals()) helper = LayerHelper("fill_constant_batch_size_like", **locals())
out = helper.create_tmp_variable(dtype=dtype) out = helper.create_tmp_variable(dtype=dtype)
helper.append_op( helper.append_op(
...@@ -114,7 +149,7 @@ def fill_constant_batch_size_like(input, ...@@ -114,7 +149,7 @@ def fill_constant_batch_size_like(input,
return out return out
def ones(shape, dtype, main_program=None): def ones(shape, dtype):
""" """
This function performs the same function as fill_constant() declared above This function performs the same function as fill_constant() declared above
with the constant value being 1.0. with the constant value being 1.0.
...@@ -122,7 +157,7 @@ def ones(shape, dtype, main_program=None): ...@@ -122,7 +157,7 @@ def ones(shape, dtype, main_program=None):
return fill_constant(value=1.0, **locals()) return fill_constant(value=1.0, **locals())
def zeros(shape, dtype, main_program=None): def zeros(shape, dtype):
""" """
This function performs the same function as fill_constant() declared above This function performs the same function as fill_constant() declared above
with the constant value being 0.0. with the constant value being 0.0.
......
...@@ -10,25 +10,19 @@ def simple_img_conv_pool(input, ...@@ -10,25 +10,19 @@ def simple_img_conv_pool(input,
pool_stride, pool_stride,
act, act,
param_attr=None, param_attr=None,
pool_type='max', pool_type='max'):
main_program=None,
startup_program=None):
conv_out = layers.conv2d( conv_out = layers.conv2d(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
param_attr=param_attr, param_attr=param_attr,
act=act, act=act)
main_program=main_program,
startup_program=startup_program)
pool_out = layers.pool2d( pool_out = layers.pool2d(
input=conv_out, input=conv_out,
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride)
main_program=main_program,
startup_program=startup_program)
return pool_out return pool_out
...@@ -42,9 +36,7 @@ def img_conv_group(input, ...@@ -42,9 +36,7 @@ def img_conv_group(input,
conv_with_batchnorm=False, conv_with_batchnorm=False,
conv_batchnorm_drop_rate=None, conv_batchnorm_drop_rate=None,
pool_stride=1, pool_stride=1,
pool_type=None, pool_type=None):
main_program=None,
startup_program=None):
""" """
Image Convolution Group, Used for vgg net. Image Convolution Group, Used for vgg net.
""" """
...@@ -75,31 +67,19 @@ def img_conv_group(input, ...@@ -75,31 +67,19 @@ def img_conv_group(input,
filter_size=conv_filter_size[i], filter_size=conv_filter_size[i],
padding=conv_padding[i], padding=conv_padding[i],
param_attr=param_attr[i], param_attr=param_attr[i],
act=local_conv_act, act=local_conv_act)
main_program=main_program,
startup_program=startup_program)
if conv_with_batchnorm[i]: if conv_with_batchnorm[i]:
tmp = layers.batch_norm( tmp = layers.batch_norm(input=tmp, act=conv_act)
input=tmp,
act=conv_act,
main_program=main_program,
startup_program=startup_program)
drop_rate = conv_batchnorm_drop_rate[i] drop_rate = conv_batchnorm_drop_rate[i]
if abs(drop_rate) > 1e-5: if abs(drop_rate) > 1e-5:
tmp = layers.dropout( tmp = layers.dropout(x=tmp, dropout_prob=drop_rate)
x=tmp,
dropout_prob=drop_rate,
main_program=main_program,
startup_program=startup_program)
pool_out = layers.pool2d( pool_out = layers.pool2d(
input=tmp, input=tmp,
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride)
main_program=main_program,
startup_program=startup_program)
return pool_out return pool_out
...@@ -108,21 +88,13 @@ def sequence_conv_pool(input, ...@@ -108,21 +88,13 @@ def sequence_conv_pool(input,
filter_size, filter_size,
param_attr=None, param_attr=None,
act="sigmoid", act="sigmoid",
pool_type="max", pool_type="max"):
main_program=None,
startup_program=None):
conv_out = layers.sequence_conv( conv_out = layers.sequence_conv(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
param_attr=param_attr, param_attr=param_attr,
act=act, act=act)
main_program=main_program,
startup_program=startup_program)
pool_out = layers.sequence_pool( pool_out = layers.sequence_pool(input=conv_out, pool_type=pool_type)
input=conv_out,
pool_type=pool_type,
main_program=main_program,
startup_program=startup_program)
return pool_out return pool_out
...@@ -2,10 +2,11 @@ from collections import defaultdict ...@@ -2,10 +2,11 @@ from collections import defaultdict
import framework import framework
from backward import append_backward_ops from backward import append_backward_ops
from framework import unique_name from framework import unique_name, program_guard
from initializer import Constant from initializer import Constant
from layer_helper import LayerHelper from layer_helper import LayerHelper
from regularizer import append_regularization_ops from regularizer import append_regularization_ops
from clip import append_gradient_clip_ops
__all__ = ['SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad'] __all__ = ['SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad']
...@@ -159,34 +160,32 @@ class Optimizer(object): ...@@ -159,34 +160,32 @@ class Optimizer(object):
# Create any accumulators # Create any accumulators
program = loss.block.program program = loss.block.program
self.helper = LayerHelper( with program_guard(program, startup_program):
self.__class__.__name__, self.helper = LayerHelper(self.__class__.__name__)
main_program=program, self._create_accumulators(loss.block,
startup_program=startup_program) [p[0] for p in parameters_and_grads])
self._create_accumulators(loss.block,
[p[0] for p in parameters_and_grads]) optimize_ops = []
for param_and_grad in parameters_and_grads:
optimize_ops = [] if param_and_grad[0].trainable is True and param_and_grad[
for param_and_grad in parameters_and_grads: 1] is not None:
if param_and_grad[0].trainable is True and param_and_grad[ optimize_op = self._append_optimize_op(loss.block,
1] is not None: param_and_grad)
optimize_op = self._append_optimize_op(loss.block, optimize_ops.append(optimize_op)
param_and_grad)
optimize_ops.append(optimize_op) # Returned list of ops can include more ops in addition
# to optimization ops
# Returned list of ops can include more ops in addition return_ops = optimize_ops
# to optimization ops
return_ops = optimize_ops # Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies
# Get custom finish ops for subclasses finish_ops = self._finish_update(loss.block)
# FIXME: Need to fix this once we figure out how to handle dependencies if finish_ops is not None:
finish_ops = self._finish_update(loss.block) return_ops += finish_ops
if finish_ops is not None:
return_ops += finish_ops if self._global_step is not None:
return_ops.append(self._increment_global_step(loss.block))
if self._global_step is not None: return return_ops
return_ops.append(self._increment_global_step(loss.block))
return return_ops
def minimize(self, def minimize(self,
loss, loss,
...@@ -199,9 +198,13 @@ class Optimizer(object): ...@@ -199,9 +198,13 @@ class Optimizer(object):
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = append_backward_ops(loss, parameter_list, no_grad_set) params_grads = append_backward_ops(loss, parameter_list, no_grad_set)
params_grads = append_gradient_clip_ops(params_grads)
# Add regularization if any # Add regularization if any
params_grads = append_regularization_ops(params_grads, params_grads = append_regularization_ops(params_grads,
self.regularization) self.regularization)
optimize_ops = self.create_optimization_pass(params_grads, loss, optimize_ops = self.create_optimization_pass(params_grads, loss,
startup_program) startup_program)
return optimize_ops return optimize_ops
......
from initializer import Initializer, Xavier, Constant from initializer import Initializer, Xavier, Constant
from regularizer import WeightDecayRegularizer from regularizer import WeightDecayRegularizer
__all__ = ['ParamAttr']
class ParamAttr(object): class ParamAttr(object):
def __init__(self, def __init__(self,
...@@ -8,12 +10,14 @@ class ParamAttr(object): ...@@ -8,12 +10,14 @@ class ParamAttr(object):
initializer=None, initializer=None,
learning_rate=1.0, learning_rate=1.0,
regularizer=None, regularizer=None,
trainable=True): trainable=True,
clip=None):
self.name = name self.name = name
self.initializer = initializer self.initializer = initializer
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.regularizer = regularizer self.regularizer = regularizer
self.trainable = trainable self.trainable = trainable
self.clip = clip
def set_default_initializer(self, initializer): def set_default_initializer(self, initializer):
if initializer is None: if initializer is None:
...@@ -56,7 +60,8 @@ class ParamAttr(object): ...@@ -56,7 +60,8 @@ class ParamAttr(object):
'name': self.name, 'name': self.name,
'learning_rate': self.learning_rate, 'learning_rate': self.learning_rate,
'regularizer': self.regularizer, 'regularizer': self.regularizer,
'trainable': self.trainable 'trainable': self.trainable,
'clip_attr': self.clip
} }
if with_initializer: if with_initializer:
kwargs['initializer'] = self.initializer kwargs['initializer'] = self.initializer
......
image/ image/
fit_a_line.model/ fit_a_line.model/
tmp tmp
cuda_profiler.txt
...@@ -150,7 +150,7 @@ def main(): ...@@ -150,7 +150,7 @@ def main():
crf_decode = fluid.layers.crf_decoding( crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
precision, recall, f1_score = fluid.layers.chunk_eval( chunk_evaluator = fluid.evaluator.ChunkEvaluator(
input=crf_decode, input=crf_decode,
label=target, label=target,
chunk_scheme="IOB", chunk_scheme="IOB",
...@@ -176,20 +176,21 @@ def main(): ...@@ -176,20 +176,21 @@ def main():
batch_id = 0 batch_id = 0
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
chunk_evaluator.reset(exe)
for data in train_data(): for data in train_data():
outs = exe.run(fluid.default_main_program(), cost, precision, recall, f1_score = exe.run(
feed=feeder.feed(data), fluid.default_main_program(),
fetch_list=[avg_cost, precision, recall, f1_score]) feed=feeder.feed(data),
avg_cost_val = np.array(outs[0]) fetch_list=[avg_cost] + chunk_evaluator.metrics)
precision_val = np.array(outs[1]) pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
recall_val = np.array(outs[2]) exe)
f1_score_val = np.array(outs[3])
if batch_id % 10 == 0: if batch_id % 10 == 0:
print("avg_cost=" + str(avg_cost_val)) print("avg_cost:" + str(cost) + " precision:" + str(
print("precision_val=" + str(precision_val)) precision) + " recall:" + str(recall) + " f1_score:" + str(
print("recall_val:" + str(recall_val)) f1_score) + " pass_precision:" + str(
print("f1_score_val:" + str(f1_score_val)) pass_precision) + " pass_recall:" + str(pass_recall)
+ " pass_f1_score:" + str(pass_f1_score))
# exit early for CI # exit early for CI
exit(0) exit(0)
......
...@@ -11,7 +11,9 @@ regularizer = fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE) ...@@ -11,7 +11,9 @@ regularizer = fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE)
hidden1 = fluid.layers.fc(input=image, hidden1 = fluid.layers.fc(input=image,
size=128, size=128,
act='relu', act='relu',
param_attr=regularizer) param_attr=fluid.ParamAttr(
regularizer=regularizer,
clip=fluid.clip.ClipByValue(10)))
hidden2 = fluid.layers.fc(input=hidden1, hidden2 = fluid.layers.fc(input=hidden1,
size=64, size=64,
act='relu', act='relu',
...@@ -33,11 +35,10 @@ opts = optimizer.minimize(avg_cost) ...@@ -33,11 +35,10 @@ opts = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label) accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
inference_program = fluid.default_main_program().clone() inference_program = fluid.default_main_program().clone()
test_accuracy = fluid.evaluator.Accuracy( with fluid.program_guard(inference_program):
input=predict, label=label, main_program=inference_program) test_accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states
inference_program = fluid.io.get_inference_program( inference_program = fluid.io.get_inference_program(test_target)
test_target, main_program=inference_program)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
......
...@@ -4,12 +4,7 @@ import paddle.v2.fluid as fluid ...@@ -4,12 +4,7 @@ import paddle.v2.fluid as fluid
from paddle.v2.fluid.layer_helper import LayerHelper from paddle.v2.fluid.layer_helper import LayerHelper
def lstm(x, def lstm(x, c_pre_init, hidden_dim, forget_bias=None):
c_pre_init,
hidden_dim,
forget_bias=None,
main_program=None,
startup_program=None):
""" """
This function helps create an operator for the LSTM (Long Short Term This function helps create an operator for the LSTM (Long Short Term
Memory) cell that can be used inside an RNN. Memory) cell that can be used inside an RNN.
...@@ -20,15 +15,8 @@ def lstm(x, ...@@ -20,15 +15,8 @@ def lstm(x,
c_pre = rnn.memory(init=c_pre_init) c_pre = rnn.memory(init=c_pre_init)
x_t = rnn.step_input(x) x_t = rnn.step_input(x)
before_fc = fluid.layers.concat( before_fc = fluid.layers.concat(input=[x_t, c_pre], axis=1)
input=[x_t, c_pre], after_fc = fluid.layers.fc(input=before_fc, size=hidden_dim * 4)
axis=1,
main_program=main_program,
startup_program=startup_program)
after_fc = fluid.layers.fc(input=before_fc,
size=hidden_dim * 4,
main_program=main_program,
startup_program=startup_program)
dtype = x.dtype dtype = x.dtype
c = helper.create_tmp_variable(dtype) c = helper.create_tmp_variable(dtype)
......
...@@ -3,10 +3,7 @@ import numpy as np ...@@ -3,10 +3,7 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.v2.fluid.op import Operator
from paddle.v2.fluid.framework import grad_var_name
def grad_var_name(var_name):
return var_name + "@GRAD"
def get_backward_op(scope, op, no_grad_set): def get_backward_op(scope, op, no_grad_set):
......
...@@ -147,7 +147,13 @@ class TestChunkEvalOp(OpTest): ...@@ -147,7 +147,13 @@ class TestChunkEvalOp(OpTest):
'Recall': np.asarray( 'Recall': np.asarray(
[recall], dtype='float32'), [recall], dtype='float32'),
'F1-Score': np.asarray( 'F1-Score': np.asarray(
[f1], dtype='float32') [f1], dtype='float32'),
'NumInferChunks': np.asarray(
[self.num_infer_chunks], dtype='int64'),
'NumLabelChunks': np.asarray(
[self.num_label_chunks], dtype='int64'),
'NumCorrectChunks': np.asarray(
[self.num_correct_chunks], dtype='int64')
} }
def setUp(self): def setUp(self):
......
import unittest
import paddle.v2.fluid.framework as framework
class ConditionalBlock(unittest.TestCase):
def test_const_value(self):
self.assertEqual(framework.GRAD_VAR_SUFFIX, "@GRAD")
self.assertEqual(framework.TEMP_VAR_NAME, "@TEMP@")
self.assertEqual(framework.GRAD_VAR_SUFFIX, "@GRAD")
self.assertEqual(framework.ZERO_VAR_SUFFIX, "@ZERO")
if __name__ == '__main__':
unittest.main()
...@@ -47,7 +47,9 @@ class TestDropoutOp4(OpTest): ...@@ -47,7 +47,9 @@ class TestDropoutOp4(OpTest):
self.op_type = "dropout" self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64)).astype("float32")} self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
self.attrs = {'dropout_prob': 0.35, 'is_test': True} self.attrs = {'dropout_prob': 0.35, 'is_test': True}
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} self.outputs = {
'Out': self.inputs['X'] * (1.0 - self.attrs['dropout_prob'])
}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
...@@ -58,7 +60,9 @@ class TestDropoutOp5(OpTest): ...@@ -58,7 +60,9 @@ class TestDropoutOp5(OpTest):
self.op_type = "dropout" self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")} self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")}
self.attrs = {'dropout_prob': 0.75, 'is_test': True} self.attrs = {'dropout_prob': 0.75, 'is_test': True}
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} self.outputs = {
'Out': self.inputs['X'] * (1.0 - self.attrs['dropout_prob'])
}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
......
...@@ -5,12 +5,7 @@ import paddle.v2.fluid.nets as nets ...@@ -5,12 +5,7 @@ import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program
def conv_block(input, def conv_block(input, num_filter, groups, dropouts):
num_filter,
groups,
dropouts,
main_program=None,
startup_program=None):
return nets.img_conv_group( return nets.img_conv_group(
input=input, input=input,
pool_size=2, pool_size=2,
...@@ -20,90 +15,54 @@ def conv_block(input, ...@@ -20,90 +15,54 @@ def conv_block(input,
conv_act='relu', conv_act='relu',
conv_with_batchnorm=True, conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type='max', pool_type='max')
main_program=main_program,
startup_program=startup_program)
class TestLayer(unittest.TestCase): class TestLayer(unittest.TestCase):
def test_batch_norm_layer(self): def test_batch_norm_layer(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
images = fluid.layers.data( with fluid.program_guard(main_program, startup_program):
name='pixel', images = fluid.layers.data(
shape=[3, 48, 48], name='pixel', shape=[3, 48, 48], dtype='float32')
dtype='float32', hidden1 = fluid.layers.batch_norm(input=images)
main_program=main_program) hidden2 = fluid.layers.fc(input=hidden1, size=128, act='relu')
hidden1 = fluid.layers.batch_norm( fluid.layers.batch_norm(input=hidden2)
input=images,
main_program=main_program,
startup_program=startup_program)
hidden2 = fluid.layers.fc(input=hidden1,
size=128,
act='relu',
main_program=main_program)
hidden3 = fluid.layers.batch_norm(
input=hidden2,
main_program=main_program,
startup_program=startup_program)
print str(main_program) print str(main_program)
def test_dropout_layer(self): def test_dropout_layer(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
images = fluid.layers.data( with fluid.program_guard(main_program, startup_program):
name='pixel', images = fluid.layers.data(
shape=[3, 48, 48], name='pixel', shape=[3, 48, 48], dtype='float32')
dtype='float32', fluid.layers.dropout(x=images, dropout_prob=0.5)
main_program=main_program)
fluid.layers.dropout(
x=images,
dropout_prob=0.5,
main_program=main_program,
startup_program=startup_program)
# print str(main_program) print str(main_program)
def test_img_conv_group(self): def test_img_conv_group(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
images = fluid.layers.data( with fluid.program_guard(main_program, startup_program):
name='pixel', images = fluid.layers.data(
shape=[3, 48, 48], name='pixel', shape=[3, 48, 48], dtype='float32')
dtype='float32', conv1 = conv_block(images, 64, 2, [0.3, 0])
main_program=main_program, conv_block(conv1, 256, 3, [0.4, 0.4, 0])
startup_program=startup_program)
conv1 = conv_block(images, 64, 2, [0.3, 0], main_program,
startup_program)
conv2 = conv_block(conv1, 256, 3, [0.4, 0.4, 0], main_program,
startup_program)
# print str(main_program) print str(main_program)
def test_elementwise_add_with_act(self): def test_elementwise_add_with_act(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
image1 = fluid.layers.data( with fluid.program_guard(main_program, startup_program):
name='pixel1', image1 = fluid.layers.data(
shape=[3, 48, 48], name='pixel1', shape=[3, 48, 48], dtype='float32')
dtype='float32', image2 = fluid.layers.data(
main_program=main_program, name='pixel2', shape=[3, 48, 48], dtype='float32')
startup_program=startup_program) fluid.layers.elementwise_add(x=image1, y=image2, act='relu')
image2 = fluid.layers.data( print(main_program)
name='pixel2',
shape=[3, 48, 48],
dtype='float32',
main_program=main_program,
startup_program=startup_program)
out = fluid.layers.elementwise_add(
x=image1,
y=image2,
act='relu',
main_program=main_program,
startup_program=startup_program)
# print(main_program)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -6,7 +6,7 @@ import paddle.v2.fluid.core as core ...@@ -6,7 +6,7 @@ import paddle.v2.fluid.core as core
import paddle.v2.fluid.executor as executor import paddle.v2.fluid.executor as executor
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.io import save_inference_model, load_inference_model from paddle.v2.fluid.io import save_inference_model, load_inference_model
...@@ -16,35 +16,18 @@ class TestBook(unittest.TestCase): ...@@ -16,35 +16,18 @@ class TestBook(unittest.TestCase):
init_program = Program() init_program = Program()
program = Program() program = Program()
x = layers.data(
name='x', with program_guard(program, init_program):
shape=[2], x = layers.data(name='x', shape=[2], dtype='float32')
dtype='float32', y = layers.data(name='y', shape=[1], dtype='float32')
main_program=program,
startup_program=init_program) y_predict = layers.fc(input=x, size=1, act=None)
y = layers.data(
name='y', cost = layers.square_error_cost(input=y_predict, label=y)
shape=[1], avg_cost = layers.mean(x=cost)
dtype='float32',
main_program=program, sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
startup_program=init_program) sgd_optimizer.minimize(avg_cost, init_program)
y_predict = layers.fc(input=x,
size=1,
act=None,
main_program=program,
startup_program=init_program)
cost = layers.square_error_cost(
input=y_predict,
label=y,
main_program=program,
startup_program=init_program)
avg_cost = layers.mean(
x=cost, main_program=program, startup_program=init_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost, init_program)
place = core.CPUPlace() place = core.CPUPlace()
exe = executor.Executor(place) exe = executor.Executor(place)
......
...@@ -161,6 +161,41 @@ class TestBook(unittest.TestCase): ...@@ -161,6 +161,41 @@ class TestBook(unittest.TestCase):
x=dat, label=lbl)) x=dat, label=lbl))
print(str(program)) print(str(program))
def test_sequence_expand(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[10], dtype='float32')
y = layers.data(
name='y', shape=[10, 20], dtype='float32', lod_level=1)
self.assertIsNotNone(layers.sequence_expand(x=x, y=y))
print(str(program))
def test_lstm_unit(self):
program = Program()
with program_guard(program):
x_t_data = layers.data(
name='x_t_data', shape=[10, 10], dtype='float32')
x_t = layers.fc(input=x_t_data, size=10)
prev_hidden_data = layers.data(
name='prev_hidden_data', shape=[10, 20], dtype='float32')
prev_hidden = layers.fc(input=prev_hidden_data, size=20)
prev_cell_data = layers.data(
name='prev_cell', shape=[10, 30], dtype='float32')
prev_cell = layers.fc(input=prev_cell_data, size=30)
self.assertIsNotNone(
layers.lstm_unit(
x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell))
print(str(program))
def test_sequence_softmax(self):
program = Program()
with program_guard(program):
seq_data = layers.data(
name='seq_data', shape=[10, 10], dtype='float32', lod_level=1)
seq = layers.fc(input=seq_data, size=20)
self.assertIsNotNone(layers.sequence_softmax(x=seq))
print(str(program))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import numpy import numpy
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
...@@ -118,16 +118,17 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): ...@@ -118,16 +118,17 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0):
place = self.place() place = self.place()
program = Program() program = Program()
x = layers.data(name='x', shape=[10], main_program=program) with program_guard(program):
x.persistable = True x = layers.data(name='x', shape=[10])
table = layers.lod_rank_table(x, level=level, main_program=program) x.persistable = True
max_len = layers.max_sequence_len(table, main_program=program) table = layers.lod_rank_table(x, level=level)
max_len.persistable = True max_len = layers.max_sequence_len(table)
array = layers.lod_tensor_to_array(x, table, main_program=program) max_len.persistable = True
array.persistable = True array = layers.lod_tensor_to_array(x, table)
array.persistable = True
result = layers.array_to_lod_tensor(array, table, main_program=program)
result.persistable = True result = layers.array_to_lod_tensor(array, table)
result.persistable = True
exe = Executor(place) exe = Executor(place)
scope = core.Scope() scope = core.Scope()
exe.run(program, feed={'x': tensor}, scope=scope) exe.run(program, feed={'x': tensor}, scope=scope)
...@@ -160,19 +161,16 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): ...@@ -160,19 +161,16 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
place = core.CPUPlace() place = core.CPUPlace()
program = Program() program = Program()
x = layers.data( with program_guard(program):
name='x', x = layers.data(
shape=[1], name='x', shape=[1], dtype='float32', stop_gradient=False)
dtype='float32', table = layers.lod_rank_table(x, level=0)
main_program=program, array = layers.lod_tensor_to_array(x, table)
stop_gradient=False) result = layers.array_to_lod_tensor(array, table)
table = layers.lod_rank_table(x, level=0, main_program=program)
array = layers.lod_tensor_to_array(x, table, main_program=program)
result = layers.array_to_lod_tensor(array, table, main_program=program)
mean = layers.mean(x=result, main_program=program) mean = layers.mean(x=result)
append_backward_ops(mean) append_backward_ops(mean)
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
......
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program, program_guard, default_main_program, default_startup_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.optimizer import MomentumOptimizer from paddle.v2.fluid.optimizer import MomentumOptimizer
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
...@@ -10,44 +10,42 @@ import numpy as np ...@@ -10,44 +10,42 @@ import numpy as np
class TestMNISTIfElseOp(unittest.TestCase): class TestMNISTIfElseOp(unittest.TestCase):
def test_raw_api(self): def test_raw_api(self):
kwargs = {'startup_program': Program(), 'main_program': Program()} prog = Program()
image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) startup_prog = Program()
with program_guard(prog, startup_prog):
image = layers.data(name='x', shape=[784], dtype='float32')
label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) label = layers.data(name='y', shape=[1], dtype='int64')
limit = layers.fill_constant_batch_size_like( limit = layers.fill_constant_batch_size_like(
input=label, dtype='int64', shape=[1], value=5.0, **kwargs) input=label, dtype='int64', shape=[1], value=5.0)
cond = layers.less_than(x=label, y=limit)
true_image, false_image = layers.split_lod_tensor(
input=image, mask=cond)
cond = layers.less_than(x=label, y=limit, **kwargs) true_out = layers.create_tensor(dtype='float32')
true_image, false_image = layers.split_lod_tensor( true_cond = layers.ConditionalBlock([true_image])
input=image, mask=cond, **kwargs)
true_out = layers.create_tensor(dtype='float32', **kwargs) with true_cond.block():
true_cond = layers.ConditionalBlock([true_image], **kwargs) hidden = layers.fc(input=true_image, size=100, act='tanh')
prob = layers.fc(input=hidden, size=10, act='softmax')
layers.assign(input=prob, output=true_out)
with true_cond.block(): false_out = layers.create_tensor(dtype='float32')
hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) false_cond = layers.ConditionalBlock([false_image])
prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
layers.assign(input=prob, output=true_out, **kwargs)
false_out = layers.create_tensor(dtype='float32', **kwargs) with false_cond.block():
false_cond = layers.ConditionalBlock([false_image], **kwargs) hidden = layers.fc(input=false_image, size=200, act='tanh')
prob = layers.fc(input=hidden, size=10, act='softmax')
layers.assign(input=prob, output=false_out)
with false_cond.block(): prob = layers.merge_lod_tensor(
hidden = layers.fc(input=false_image, in_true=true_out, in_false=false_out, mask=cond, x=image)
size=200, loss = layers.cross_entropy(input=prob, label=label)
act='tanh', avg_loss = layers.mean(x=loss)
**kwargs)
prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
layers.assign(input=prob, output=false_out, **kwargs)
prob = layers.merge_lod_tensor( optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs) optimizer.minimize(avg_loss, startup_prog)
loss = layers.cross_entropy(input=prob, label=label, **kwargs)
avg_loss = layers.mean(x=loss, **kwargs)
optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
optimizer.minimize(avg_loss, kwargs['startup_program'])
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -57,7 +55,7 @@ class TestMNISTIfElseOp(unittest.TestCase): ...@@ -57,7 +55,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(kwargs['startup_program']) exe.run(startup_prog)
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
...@@ -65,7 +63,7 @@ class TestMNISTIfElseOp(unittest.TestCase): ...@@ -65,7 +63,7 @@ class TestMNISTIfElseOp(unittest.TestCase):
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = np.expand_dims(y_data, axis=1) y_data = np.expand_dims(y_data, axis=1)
outs = exe.run(kwargs['main_program'], outs = exe.run(prog,
feed={'x': x_data, feed={'x': x_data,
'y': y_data}, 'y': y_data},
fetch_list=[avg_loss]) fetch_list=[avg_loss])
...@@ -75,39 +73,36 @@ class TestMNISTIfElseOp(unittest.TestCase): ...@@ -75,39 +73,36 @@ class TestMNISTIfElseOp(unittest.TestCase):
self.assertFalse(True) self.assertFalse(True)
def test_ifelse(self): def test_ifelse(self):
kwargs = {'startup_program': Program(), 'main_program': Program()} prog = Program()
image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) startup_prog = Program()
with program_guard(prog, startup_prog):
label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) image = layers.data(name='x', shape=[784], dtype='float32')
limit = layers.fill_constant_batch_size_like( label = layers.data(name='y', shape=[1], dtype='int64')
input=label, dtype='int64', shape=[1], value=5.0, **kwargs)
limit = layers.fill_constant_batch_size_like(
cond = layers.less_than(x=label, y=limit, **kwargs) input=label, dtype='int64', shape=[1], value=5.0)
cond = layers.less_than(x=label, y=limit)
ie = layers.IfElse(cond, **kwargs) ie = layers.IfElse(cond)
with ie.true_block(): with ie.true_block():
true_image = ie.input(image) true_image = ie.input(image)
hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) hidden = layers.fc(input=true_image, size=100, act='tanh')
prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax')
ie.output(prob) ie.output(prob)
with ie.false_block(): with ie.false_block():
false_image = ie.input(image) false_image = ie.input(image)
hidden = layers.fc(input=false_image, hidden = layers.fc(input=false_image, size=200, act='tanh')
size=200, prob = layers.fc(input=hidden, size=10, act='softmax')
act='tanh', ie.output(prob)
**kwargs)
prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) prob = ie()
ie.output(prob) loss = layers.cross_entropy(input=prob[0], label=label)
avg_loss = layers.mean(x=loss)
prob = ie()
loss = layers.cross_entropy(input=prob[0], label=label, **kwargs) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
avg_loss = layers.mean(x=loss, **kwargs) optimizer.minimize(avg_loss, startup_prog)
optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
optimizer.minimize(avg_loss, kwargs['startup_program'])
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192), paddle.dataset.mnist.train(), buf_size=8192),
...@@ -135,4 +130,5 @@ class TestMNISTIfElseOp(unittest.TestCase): ...@@ -135,4 +130,5 @@ class TestMNISTIfElseOp(unittest.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() # temp disable if else unittest since it could be buggy.
exit(0)
import unittest import unittest
import paddle.v2.fluid.op as op import paddle.v2.fluid.op as op
import paddle.v2.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
......
from __future__ import print_function from __future__ import print_function
import unittest import unittest
from paddle.v2.fluid.framework import Program, default_main_program from paddle.v2.fluid.framework import Program, default_main_program, program_guard, grad_var_name
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
main_program = default_main_program() main_program = default_main_program()
...@@ -109,12 +109,10 @@ class TestProgram(unittest.TestCase): ...@@ -109,12 +109,10 @@ class TestProgram(unittest.TestCase):
self.assertEqual(add_op.idx, 1) self.assertEqual(add_op.idx, 1)
param_to_grad = prog.append_backward(mean_out, set()) param_to_grad = prog.append_backward(mean_out, set())
def grad_name(name):
return name + "@GRAD"
for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out", for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out",
"mean.out"): "mean.out"):
self.assertEqual(param_to_grad[var_name][0], grad_name(var_name)) self.assertEqual(param_to_grad[var_name][0],
grad_var_name(var_name))
self.assertEqual(param_to_grad[var_name][1], 0) self.assertEqual(param_to_grad[var_name][1], 0)
expect_ops = [ expect_ops = [
...@@ -129,13 +127,10 @@ class TestProgram(unittest.TestCase): ...@@ -129,13 +127,10 @@ class TestProgram(unittest.TestCase):
def test_program_clone_with_parameter(self): def test_program_clone_with_parameter(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
kwargs = { with program_guard(main_program, startup_program):
'main_program': main_program, d = layers.data(name='x', shape=[784], dtype='float32')
'startup_program': startup_program hidden = layers.fc(input=d, size=100)
} layers.fc(input=hidden, size=100)
d = layers.data(name='x', shape=[784], dtype='float32', **kwargs)
hidden = layers.fc(input=d, size=100, **kwargs)
layers.fc(input=hidden, size=100, **kwargs)
new_program = main_program.clone() new_program = main_program.clone()
self.assertNotEqual(0, len(new_program.blocks[0].all_parameters())) self.assertNotEqual(0, len(new_program.blocks[0].all_parameters()))
......
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program, grad_var_name
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy as np import numpy as np
...@@ -164,7 +164,7 @@ class RecurrentOpTest1(unittest.TestCase): ...@@ -164,7 +164,7 @@ class RecurrentOpTest1(unittest.TestCase):
for x in self.data_field for x in self.data_field
} }
fetch_list = [ fetch_list = [
self.main_program.global_block().var(x + "@GRAD") self.main_program.global_block().var(grad_var_name(x))
for x in self.data_field for x in self.data_field
] ]
......
...@@ -17,5 +17,19 @@ class TestReshapeOp(OpTest): ...@@ -17,5 +17,19 @@ class TestReshapeOp(OpTest):
self.check_grad(["X"], "Out") self.check_grad(["X"], "Out")
class TestReshapeOpDimInfer(OpTest):
def setUp(self):
self.op_type = "reshape"
self.inputs = {'X': np.random.random((10, 20)).astype("float32")}
self.attrs = {'shape': [4, -1, 5]}
self.outputs = {'Out': self.inputs['X'].reshape(self.attrs['shape'])}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Out")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -3,7 +3,7 @@ import numpy as np ...@@ -3,7 +3,7 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
class TestSeqExpand(OpTest): class TestSequenceExpand(OpTest):
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32') x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32') y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
...@@ -21,7 +21,7 @@ class TestSeqExpand(OpTest): ...@@ -21,7 +21,7 @@ class TestSeqExpand(OpTest):
self.outputs = {'Out': out} self.outputs = {'Out': out}
def setUp(self): def setUp(self):
self.op_type = 'seq_expand' self.op_type = 'sequence_expand'
self.set_data() self.set_data()
self.compute() self.compute()
...@@ -32,7 +32,7 @@ class TestSeqExpand(OpTest): ...@@ -32,7 +32,7 @@ class TestSeqExpand(OpTest):
self.check_grad(["X"], "Out") self.check_grad(["X"], "Out")
class TestSeqExpandCase1(TestSeqExpand): class TestSequenceExpandCase1(TestSequenceExpand):
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32') x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
x_lod = [[0, 2, 5]] x_lod = [[0, 2, 5]]
...@@ -41,7 +41,7 @@ class TestSeqExpandCase1(TestSeqExpand): ...@@ -41,7 +41,7 @@ class TestSeqExpandCase1(TestSeqExpand):
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
class TestSeqExpandCase2(TestSeqExpand): class TestSequenceExpandCase2(TestSequenceExpand):
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32') x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32')
x_lod = [[0, 1]] x_lod = [[0, 1]]
...@@ -50,7 +50,7 @@ class TestSeqExpandCase2(TestSeqExpand): ...@@ -50,7 +50,7 @@ class TestSeqExpandCase2(TestSeqExpand):
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
class TestSeqExpandCase3(TestSeqExpand): class TestSequenceExpandCase3(TestSequenceExpand):
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32') x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32')
x_lod = [[0, 1, 2, 3, 4]] x_lod = [[0, 1, 2, 3, 4]]
......
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import numpy as np import numpy as np
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
...@@ -75,26 +75,22 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): ...@@ -75,26 +75,22 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
level=0): level=0):
place = self.place() place = self.place()
program = Program() program = Program()
x = layers.data(name='x', shape=[1], main_program=program) with program_guard(program):
x.persistable = True x = layers.data(name='x', shape=[1])
x.persistable = True
y = layers.data(name='y', shape=[1], main_program=program) y = layers.data(name='y', shape=[1])
y.persistable = True y.persistable = True
out_true, out_false = layers.split_lod_tensor( out_true, out_false = layers.split_lod_tensor(
input=x, mask=y, level=level, main_program=program) input=x, mask=y, level=level)
out_true.persistable = True out_true.persistable = True
out_false.persistable = True out_false.persistable = True
out = layers.merge_lod_tensor( out = layers.merge_lod_tensor(
in_true=out_true, in_true=out_true, in_false=out_false, mask=y, x=x, level=level)
in_false=out_false,
mask=y,
x=x,
level=level,
main_program=program)
out.persistable = True out.persistable = True
exe = Executor(place) exe = Executor(place)
scope = core.Scope() scope = core.Scope()
...@@ -123,34 +119,21 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): ...@@ -123,34 +119,21 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
def test_grad(self): def test_grad(self):
place = core.CPUPlace() place = core.CPUPlace()
program = Program() program = Program()
with program_guard(program):
x = layers.data(
name='x', shape=[1], dtype='float32', stop_gradient=False)
y = layers.data(
name='y', shape=[1], dtype='bool', stop_gradient=False)
x = layers.data( level = 0
name='x',
shape=[1], out_true, out_false = layers.split_lod_tensor(
dtype='float32', input=x, mask=y, level=level)
main_program=program, out = layers.merge_lod_tensor(
stop_gradient=False) in_true=out_true, in_false=out_false, mask=y, x=x, level=level)
y = layers.data( mean = layers.mean(x=out)
name='y',
shape=[1], append_backward_ops(mean)
dtype='bool',
main_program=program,
stop_gradient=False)
level = 0
out_true, out_false = layers.split_lod_tensor(
input=x, mask=y, level=level, main_program=program)
out = layers.merge_lod_tensor(
in_true=out_true,
in_false=out_false,
mask=y,
x=x,
level=level,
main_program=program)
mean = layers.mean(x=out, main_program=program)
append_backward_ops(mean)
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
......
import unittest
import numpy as np
from op_test import OpTest
from test_pool2d_op import max_pool2D_forward_naive
from test_pool2d_op import avg_pool2D_forward_naive
class TestSppOp(OpTest):
def setUp(self):
self.op_type = "spp"
self.init_test_case()
input = np.random.random(self.shape).astype("float32")
nsize, csize, hsize, wsize = input.shape
out_level_flatten = []
for i in xrange(self.pyramid_height):
bins = np.power(2, i)
kernel_size = [0, 0]
padding = [0, 0]
kernel_size[0] = np.ceil(hsize /
bins.astype("double")).astype("int32")
padding[0] = (
(kernel_size[0] * bins - hsize + 1) / 2).astype("int32")
kernel_size[1] = np.ceil(wsize /
bins.astype("double")).astype("int32")
padding[1] = (
(kernel_size[1] * bins - wsize + 1) / 2).astype("int32")
out_level = self.pool2D_forward_naive(input, kernel_size,
kernel_size, padding)
out_level_flatten.append(
out_level.reshape(nsize, bins * bins * csize))
if i == 0:
output = out_level_flatten[i]
else:
output = np.concatenate((output, out_level_flatten[i]), 1)
# output = np.concatenate(out_level_flatten.tolist(), 0);
self.inputs = {'X': input.astype('float32'), }
self.attrs = {
'pyramid_height': self.pyramid_height,
'pooling_type': self.pool_type
}
self.outputs = {'Out': output.astype('float32')}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
if self.pool_type != "avg":
self.check_grad(['X'], 'Out', max_relative_error=0.05)
def init_test_case(self):
self.shape = [3, 2, 4, 4]
self.pyramid_height = 3
self.pool2D_forward_naive = max_pool2D_forward_naive
self.pool_type = "max"
class TestCase2(TestSppOp):
def init_test_case(self):
self.shape = [3, 2, 4, 4]
self.pyramid_height = 3
self.pool2D_forward_naive = avg_pool2D_forward_naive
self.pool_type = "avg"
if __name__ == '__main__':
unittest.main()
...@@ -390,8 +390,6 @@ def pipe_reader(left_cmd, ...@@ -390,8 +390,6 @@ def pipe_reader(left_cmd,
if not callable(parser): if not callable(parser):
raise TypeError("parser must be a callable object") raise TypeError("parser must be a callable object")
process = subprocess.Popen(
left_cmd.split(" "), bufsize=bufsize, stdout=subprocess.PIPE)
# TODO(typhoonzero): add a thread to read stderr # TODO(typhoonzero): add a thread to read stderr
# Always init a decompress object is better than # Always init a decompress object is better than
...@@ -400,6 +398,8 @@ def pipe_reader(left_cmd, ...@@ -400,6 +398,8 @@ def pipe_reader(left_cmd,
32 + zlib.MAX_WBITS) # offset 32 to skip the header 32 + zlib.MAX_WBITS) # offset 32 to skip the header
def reader(): def reader():
process = subprocess.Popen(
left_cmd.split(" "), bufsize=bufsize, stdout=subprocess.PIPE)
remained = "" remained = ""
while True: while True:
buff = process.stdout.read(bufsize) buff = process.stdout.read(bufsize)
......
...@@ -145,5 +145,35 @@ class TestXmap(unittest.TestCase): ...@@ -145,5 +145,35 @@ class TestXmap(unittest.TestCase):
self.assertEqual(e, mapper(idx)) self.assertEqual(e, mapper(idx))
class TestPipeReader(unittest.TestCase):
def test_pipe_reader(self):
def simple_parser(lines):
return lines
import tempfile
records = [str(i) for i in xrange(5)]
temp = tempfile.NamedTemporaryFile()
try:
with open(temp.name, 'w') as f:
for r in records:
f.write('%s\n' % r)
cmd = "cat %s" % temp.name
reader = paddle.v2.reader.pipe_reader(
cmd, simple_parser, bufsize=128)
for i in xrange(4):
result = []
for r in reader():
result.append(r)
for idx, e in enumerate(records):
print e, result[idx]
self.assertEqual(e, result[idx])
finally:
# delete the temporary file
temp.close()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册