提交 aa1ac36c 编写于 作者: Q qijun

merge baidu/develop

......@@ -6,8 +6,18 @@ height = 227
width = 227
num_class = 1000
batch_size = get_config_arg('batch_size', int, 128)
gp = get_config_arg('layer_num', int, 1)
is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
args = {
'height': height,
'width': width,
'color': True,
'num_class': num_class,
'is_infer': is_infer,
'num_samples': num_samples
}
define_py_data_sources2(
"train.list", None, module="provider", obj="process", args=args)
......@@ -31,7 +41,7 @@ net = img_pool_layer(input=net, pool_size=3, stride=2)
# conv2
net = img_conv_layer(
input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1)
input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=gp)
net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
net = img_pool_layer(input=net, pool_size=3, stride=2)
......@@ -40,11 +50,11 @@ net = img_conv_layer(
input=net, filter_size=3, num_filters=384, stride=1, padding=1)
# conv4
net = img_conv_layer(
input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1)
input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=gp)
# conv5
net = img_conv_layer(
input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1)
input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=gp)
net = img_pool_layer(input=net, pool_size=3, stride=2)
net = fc_layer(
......@@ -59,6 +69,9 @@ net = fc_layer(
layer_attr=ExtraAttr(drop_rate=0.5))
net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
lab = data_layer('label', num_class)
loss = cross_entropy(input=net, label=lab)
outputs(loss)
if is_infer:
outputs(net)
else:
lab = data_layer('label', num_class)
loss = cross_entropy(input=net, label=lab)
outputs(loss)
......@@ -7,13 +7,15 @@ num_class = 1000
batch_size = get_config_arg('batch_size', int, 128)
use_gpu = get_config_arg('use_gpu', bool, True)
is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = {
'height': height,
'width': width,
'color': True,
'num_class': num_class,
'is_infer': is_infer
'is_infer': is_infer,
'num_samples': num_samples
}
define_py_data_sources2(
"train.list" if not is_infer else None,
......
......@@ -14,6 +14,7 @@ def initHook(settings, height, width, color, num_class, **kwargs):
else:
settings.data_size = settings.height * settings.width
settings.is_infer = kwargs.get('is_infer', False)
settings.num_samples = kwargs.get('num_samples', 2560)
if settings.is_infer:
settings.slots = [dense_vector(settings.data_size)]
else:
......@@ -23,7 +24,7 @@ def initHook(settings, height, width, color, num_class, **kwargs):
@provider(
init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_list):
for i in xrange(2560 if settings.is_infer else 1024):
for i in xrange(settings.num_samples):
img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
if settings.is_infer:
yield img.astype('float32')
......
......@@ -7,13 +7,15 @@ num_class = 1000
batch_size = get_config_arg('batch_size', int, 64)
layer_num = get_config_arg("layer_num", int, 50)
is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = {
'height': height,
'width': width,
'color': True,
'num_class': num_class,
'is_infer': is_infer
'is_infer': is_infer,
'num_samples': num_samples
}
define_py_data_sources2(
"train.list" if not is_infer else None,
......
......@@ -37,7 +37,7 @@ function infer() {
--trainer_count=1 \
--num_passes=1 \
--save_dir="models/${topology}-${layer_num}" \
--config_args="batch_size=128,layer_num=${layer_num}" \
--config_args="batch_size=128,layer_num=${layer_num},num_samples=256" \
> /dev/null 2>&1
echo "Done"
fi
......@@ -79,8 +79,9 @@ fi
# inference benchmark
for use_mkldnn in True False; do
for batchsize in 1 2 4 8 16; do
infer googlenet v1 $batchsize $use_mkldnn
infer resnet 50 $batchsize $use_mkldnn
infer vgg 19 $batchsize $use_mkldnn
infer resnet 50 $batchsize $use_mkldnn
infer googlenet v1 $batchsize $use_mkldnn
infer alexnet 2 $batchsize $use_mkldnn
done
done
......@@ -47,5 +47,6 @@ for use_mkldnn in True False; do
train vgg 19 $batchsize $use_mkldnn
train resnet 50 $batchsize $use_mkldnn
train googlenet v1 $batchsize $use_mkldnn
train alexnet 2 $batchsize $use_mkldnn
done
done
......@@ -23,24 +23,25 @@ function infer() {
echo "./run_mkl_infer.sh to save the model first"
exit 0
fi
log_period=$((256 / bs))
log_period=$((32 / bs))
paddle train --job=test \
--config="${topology}.py" \
--use_mkldnn=False \
--use_gpu=False \
--trainer_count=$thread \
--log_period=$log_period \
--config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \
--config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True,num_samples=256" \
--init_model_path=$models_in \
2>&1 | tee ${log}
# calculate the last 5 logs period time of 1280 samples,
# calculate the last 5 logs period time of 160(=32*5) samples,
# the time before are burning time.
start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
start_sec=`clock_to_seconds $start`
end_sec=`clock_to_seconds $end`
fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'`
echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
fps=`awk 'BEGIN{printf "%.2f",(160 / ('$end_sec' - '$start_sec'))}'`
echo "Last 160 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
}
......@@ -56,7 +57,8 @@ fi
# inference benchmark
for batchsize in 1 2 4 8 16; do
infer googlenet v1 $batchsize
infer resnet 50 $batchsize
infer vgg 19 $batchsize
infer resnet 50 $batchsize
infer googlenet v1 $batchsize
infer alexnet 2 $batchsize
done
......@@ -12,10 +12,11 @@ function train() {
config="${topology}.py"
paddle train --job=time \
--config=$config \
--use_mkldnn=False \
--use_gpu=False \
--trainer_count=$thread \
--log_period=10 \
--test_period=100 \
--log_period=3 \
--test_period=30 \
--config_args=$args \
2>&1 | tee ${log}
......@@ -36,4 +37,5 @@ for batchsize in 64 128 256; do
train vgg 19 $batchsize
train resnet 50 $batchsize
train googlenet v1 $batchsize
train alexnet 2 $batchsize
done
......@@ -7,13 +7,15 @@ num_class = 1000
batch_size = get_config_arg('batch_size', int, 64)
layer_num = get_config_arg('layer_num', int, 19)
is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = {
'height': height,
'width': width,
'color': True,
'num_class': num_class,
'is_infer': is_infer
'is_infer': is_infer,
'num_samples': num_samples
}
define_py_data_sources2(
"train.list" if not is_infer else None,
......
......@@ -253,9 +253,9 @@ IF(NOT PROTOBUF_FOUND)
IF(WITH_C_API)
INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf)
IF(ANDROID)
INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI})
INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI})
ELSE()
INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib)
INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib)
ENDIF()
ENDIF()
......
......@@ -467,7 +467,7 @@ lambda_cost
:noindex:
square_error_cost
--------
-----------------
.. autoclass:: paddle.v2.layer.square_error_cost
:noindex:
......@@ -533,7 +533,7 @@ Miscs
=====
dropout
--------------
--------
.. autoclass:: paddle.v2.layer.dropout
:noindex:
......
......@@ -19,17 +19,17 @@ dynamic_lstm
:noindex:
data
---------
----
.. autofunction:: paddle.v2.fluid.layers.data
:noindex:
mean
---------
----
.. autofunction:: paddle.v2.fluid.layers.mean
:noindex:
mul
---------
---
.. autofunction:: paddle.v2.fluid.layers.mul
:noindex:
......@@ -45,13 +45,13 @@ elementwise_div
dropout
---------
-------
.. autofunction:: paddle.v2.fluid.layers.dropout
:noindex:
reshape
---------
--------
.. autofunction:: paddle.v2.fluid.layers.reshape
:noindex:
......@@ -81,67 +81,67 @@ transpose
sigmoid_cross_entropy_with_logits
---------
---------------------------------
.. autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits
:noindex:
cast
---------
----
.. autofunction:: paddle.v2.fluid.layers.cast
:noindex:
concat
---------
-------
.. autofunction:: paddle.v2.fluid.layers.concat
:noindex:
sums
---------
----
.. autofunction:: paddle.v2.fluid.layers.sums
:noindex:
linear_chain_crf
---------
----------------
.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf
:noindex:
assign
---------
-------
.. autofunction:: paddle.v2.fluid.layers.embedding
:noindex:
split_lod_tensor
---------
----------------
.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor
:noindex:
merge_lod_tensor
---------
----------------
.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor
:noindex:
cos_sim
---------
--------
.. autofunction:: paddle.v2.fluid.layers.cos_sim
:noindex:
cross_entropy
---------
-------------
.. autofunction:: paddle.v2.fluid.layers.cross_entropy
:noindex:
square_error_cost
---------
-----------------
.. autofunction:: paddle.v2.fluid.layers.square_error_cost
:noindex:
......@@ -153,68 +153,80 @@ accuracy
sequence_conv
---------
-------------
.. autofunction:: paddle.v2.fluid.layers.sequence_conv
:noindex:
conv2d
---------
------
.. autofunction:: paddle.v2.fluid.layers.conv2d
:noindex:
sequence_pool
---------
-------------
.. autofunction:: paddle.v2.fluid.layers.sequence_pool
:noindex:
sequence_first_step
-------------------
.. autofunction:: paddle.v2.fluid.layers.sequence_first_step
:noindex:
sequence_last_step
------------------
.. autofunction:: paddle.v2.fluid.layers.sequence_last_step
:noindex:
pool2d
---------
------
.. autofunction:: paddle.v2.fluid.layers.pool2d
:noindex:
batch_norm
---------
----------
.. autofunction:: paddle.v2.fluid.layers.batch_norm
:noindex:
beam_search_decode
---------
------------------
.. autofunction:: paddle.v2.fluid.layers.beam_search_decode
:noindex:
lod_rank_table
---------
--------------
.. autofunction:: paddle.v2.fluid.layers.lod_rank_table
:noindex:
max_sequence_len
---------
----------------
.. autofunction:: paddle.v2.fluid.layers.max_sequence_len
:noindex:
topk
---------
-----
.. autofunction:: paddle.v2.fluid.layers.topk
:noindex:
lod_tensor_to_array
---------
-------------------
.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array
:noindex:
array_to_lod_tensor
---------
-------------------
.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor
:noindex:
......@@ -222,26 +234,26 @@ array_to_lod_tensor
fill_constant
---------
-------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant
:noindex:
fill_constant_batch_size_like
---------
-----------------------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like
:noindex:
ones
---------
----
.. autofunction:: paddle.v2.fluid.layers.ones
:noindex:
zeros
---------
-----
.. autofunction:: paddle.v2.fluid.layers.zeros
:noindex:
......@@ -253,14 +265,14 @@ increment
array_write
---------
-----------
.. autofunction:: paddle.v2.fluid.layers.array_write
:noindex:
create_array
---------
------------
.. autofunction:: paddle.v2.fluid.layers.create_array
:noindex:
......@@ -272,31 +284,31 @@ less_than
array_read
---------
----------
.. autofunction:: paddle.v2.fluid.layers.array_read
:noindex:
shrink_memory
---------
--------------
.. autofunction:: paddle.v2.fluid.layers.shrink_memory
:noindex:
array_length
---------
-------------
.. autofunction:: paddle.v2.fluid.layers.array_length
:noindex:
conv2d_transpose
---------
----------------
.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
:noindex:
sequence_expand
---------
---------------
.. autofunction:: paddle.v2.fluid.layers.sequence_expand
:noindex:
......@@ -308,13 +320,19 @@ lstm_unit
sequence_softmax
---------
----------------
.. autofunction:: paddle.v2.fluid.layers.sequence_softmax
:noindex:
reduce_sum
---------
----------
.. autofunction:: paddle.v2.fluid.layers.reduce_sum
:noindex:
reduce_mean
---------
.. autofunction:: paddle.v2.fluid.layers.reduce_mean
:noindex:
......@@ -3,19 +3,19 @@ Nets
===========
simple_img_conv_pool
-----------
--------------------
.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool
:noindex:
img_conv_group
-----------
---------------
.. autofunction:: paddle.v2.fluid.nets.img_conv_group
:noindex:
sequence_conv_pool
-----------
------------------
.. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool
:noindex:
......
......@@ -18,7 +18,7 @@ SGDOptimizer
MomentumOptimizer
-----------
-----------------
.. automodule:: paddle.v2.fluid.optimizer
:members: MomentumOptimizer
:noindex:
......@@ -26,14 +26,14 @@ MomentumOptimizer
AdagradOptimizer
-----------
----------------
.. automodule:: paddle.v2.fluid.optimizer
:members: AdagradOptimizer
:noindex:
AdamOptimizer
-----------
-------------
.. automodule:: paddle.v2.fluid.optimizer
:members: AdamOptimizer
:noindex:
......@@ -47,7 +47,7 @@ AdamaxOptimizer
DecayedAdagradOptimizer
-----------
-----------------------
.. automodule:: paddle.v2.fluid.optimizer
:members: DecayedAdagradOptimizer
:noindex:
......
......@@ -3,14 +3,14 @@ Regularizer
===========
WeightDecayRegularizer
-----------
----------------------
.. automodule:: paddle.v2.fluid.regularizer
:members: WeightDecayRegularizer
:noindex:
L2DecayRegularizer
-----------
------------------
.. automodule:: paddle.v2.fluid.regularizer
:members: L2DecayRegularizer
:noindex:
......@@ -18,7 +18,7 @@ L2DecayRegularizer
L1DecayRegularizer
-----------
-------------------
.. automodule:: paddle.v2.fluid.regularizer
:members: L1DecayRegularizer
......
## Problem
In PaddlePaddle's [Design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md), one Operator may have multiple kernels. Users may have some personal preference to choose a certain type of kernel for an operator, such as `force_cpu` to choose a CPU kernel, `use_cudnn` to choose a CUDNN kernel, we need to provide a way for users to do this.
In the current design, we use KernelType to describe one kernel.
```cpp
struct KernelType {
Place place_;
DataType data_type_;
LayoutType layout_;
};
```
`place_` `data_type_` and `layout_` can be got from the input tensors of the operator, `GetActualKernelType(inputs)` use inputs to infer the proper kernel key that fit the incoming data, but users can not directly configure it.
The [design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md) also provides a virtual method `GetExpectedKernelType` that user can overload and use to choose the KernelType they want to use.
So we should send the information user defined in proto to `GetExpectedKernelType` for choosing a kernel.
The problem is, how should we define and send the information for `GetExpectedKernelType` to use?
## Solution
### Potential choice
1. Do nothing, let the user add the information they want to operator‘s attribute and get them inside `GetExpectedKernelType`, this can work properly. But there is a little problem that users may define many kinds of hints for the same purpose, such as `force_cpu`, `use_cpu`, `cpu_kernel` to choose CPU kernel, and `use_cudnn`, `force_cudnn`, `cudnn_kernel` to choose CUDNN kernel.
2. Pre-define all the needed option and use a single attr key such as `kernel_hint` for the user, this is not so flexible if the user wants to define some more kind of hint.
### Final choice
To provide enough flexibility while avoiding confusion definition, we can define some global constants for these attribute names, such as `force_cpu`, `use_cudnn`, `use_mkldnn` for a user to choose.
In C++
```cpp
const std::string kForceCPU = "force_cpu";
const std::string kUseCUDNN = "use_cudnn";
const std::string kUseMKLDNN = "use_mkldnn";
KernelType GetExpectedKernelType() {
if (Attr<bool>(kForceCPU)) {
return KernelType(CPUPlace, ...)
} else {
...
}
}
```
In Python code
```python
FORCE_CPU = core.kForceCPU()
def xx_layer(..., force_cpu=false):
layer_helper = LayerHelper(...)
layer_helper.append_op(
type="xx",
attr={FORCE_CPU: force_cpu})
```
# Design Doc: The Keys of Operator Kernel Type
## Problem
An operator can have different kernel implementations, and each operator will have a map to store the related kernels. Fluid uses `OpKernelType` as a key to identify a unique Kernel. Before an operator runs, an certain kernel must be chosen by a key of `OpKernelType`. Currently, `OpKernelType` is defined as follows:
```cpp
struct OpKernelType {
platform::Place place_;
proto::DataType data_type_;
};
```
For more details, please refer to [codes](https://github.com/PaddlePaddle/Paddle/blob/2d5ec16bc8a09fb8e0f62c89b116b0cd1d333907/paddle/framework/operator.h#L348-L374) in github.
It contains two keys, `Place` and `DataType`. And these two keys will be hashed to a unique key to represent a certain type of kernel. However, these two keys are not enough. We need a more complete representation of `OpKernelType`.
We often implement a kernel of an operator with some computing library in certain device(place). Please remind that computing library and device are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices.
For example, Eigen library can support Nvidia GPU/AMD GPU/CPU. And MKLDNN library can support Intel CPU/Intel FPGA. Both `Place` and `Library` should be a key of `OpKernelType`.
It's obvious that different DataTypes, like fp64/fp32/int8 will have different kernels. But the data layout of a Tensor will also lead to different implementation. Please refer to the batch norm operator [kernels](https://github.com/PaddlePaddle/Paddle/blob/a948fac4d0ad7e0412d373b8aabeb711c2899563/paddle/operators/batch_norm_op.cc#L180-L209). Data Layout should also be taken into consideration.
## Solution
There are four keys to determine a kernel type of an operator: `Place`/`Library`/`DataType`/`Layout`.
```cpp
struct OpKernelType {
platform::Place place_;
platform::Library library_;
proto::DataType data_type_;
framework::Layout layout_;
};
```
Following is the details:
### Place
`Place` is defined as follows:
```cpp
typedef boost::variant<CUDAPlace, ROCmPlace, FPGAPlace, CPUPlace> Place;
```
`Place` is to represent the device memory where data is locating.
### Library
One operator kernel is usually implemented based on one library. `Library` is defined as a enum variable:
```cpp
enum Library { Plain, MKLDNN, CUDNN };
```
We use `Plain` enumerator to represent default library. Since most operators in Fluid are implemented based on `Eigen` library, we take `Eigen` library as the `Plain` enumerator.
A library usually has a corresponding `DeviceContext` which contains some handles needed by computation. Fluid now have two default DeviceContexts in CPU and CUDA, `CPUDeviceContext` and `CUDADeviceContext`. `CPUDeviceContext` contains a Eigen library handle and `CDUADeviceContext` contains a Eigen library handle and cuBLAS handle.
If we want to support new Library, a new enumerator need to be added to `Library` and a new corresponding `LibraryDeviceContext` will be created.
### DataType
`DataType` is defined in [framework.proto](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto). Currently, int32/int64/fp32/fp64 are supported.
### Layout
Actually, a Tensor is a view of a block of memory. Besides a pointer to the memory, we also have to get some other descriptions of this block of memory, such as shape(ddim), stride, and layout.
Different layout leads to different implementation of operator kernel. There are mainly 4 principles we have to follow to support layout in our fluid framework.
- We take layout as a data member of Tensor. Layout is actually a enum variable. If fluid is built with MKLDNN, then, the memory format in MKLDNN will be added into this enum variable too.
- Users have to set layout for input data. And some operators like fill_constant/random, also have to set layout of generating data. Of course, we can have some default layout, like NCHW.
- The inference of Layout is at run-time, not compile-time.
- Every operator have to implement different kernels for different layouts. Let's take MKLDNN as an example, if we want to implement a MKLDNN convolution operator, we have to realize all the kernels for different layout, list at [here](http://01org.github.io/mkl-dnn/structmkldnn_1_1memory.html). And we will have a special macro to do registering kernels for MKLDNN operators.
`Layout` is also defined as a enum variable:
```cpp
enum Layout {
kNCHW,
kNHWC,
#ifdef PADDLE_WITH_MKLDNN
knChw8c
...
#endif
};
```
......@@ -70,13 +70,13 @@ PaddlePaddle编译需要使用到下面的依赖(包含但不限于),其
:header: "依赖", "版本", "说明"
:widths: 10, 15, 30
"CMake", ">=3.5", ""
"CMake", ">=3.2", ""
"GCC", "4.8.2", "推荐使用CentOS的devtools2"
"Python", "2.7.x", "依赖libpython2.7.so"
"pip", ">=9.0", ""
"numpy", "", ""
"Python", "2.7.x", "依赖libpython2.7.so"
"pip", ">=9.0", ""
"numpy", "", ""
"SWIG", ">=2.0", ""
"Go", ">=1.8", "可选"
"Go", ">=1.8", "可选"
.. _build_options:
......
......@@ -76,13 +76,13 @@ will be downloaded automatically.
:header: "Dependency", "Version", "Description"
:widths: 10, 15, 30
"CMake", ">=3.5", ""
"CMake", ">=3.2", ""
"GCC", "4.8.2", "Recommend devtools2 for CentOS"
"Python", "2.7.x", "Need libpython2.7.so"
"pip", ">=9.0", ""
"numpy", "", ""
"Python", "2.7.x", "Need libpython2.7.so"
"pip", ">=9.0", ""
"numpy", "", ""
"SWIG", ">=2.0", ""
"Go", ">=1.8", "Optional"
"Go", ">=1.8", "Optional"
.. _build_options:
......
......@@ -37,11 +37,11 @@ PaddlePaddle可以使用常用的Python包管理工具
:header: "版本说明", "cp27-cp27mu", "cp27-cp27m", "C-API"
:widths: 1, 3, 3, 3
"cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "暂无"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "暂无"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
.. _pip_dependency:
......
......@@ -40,11 +40,11 @@ If the links below shows up the login form, just click "Log in as guest" to star
:header: "version", "cp27-cp27mu", "cp27-cp27m", "C-API"
:widths: 1, 3, 3, 3
"cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "Not Available"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "Not Available"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
.. _pip_dependency:
......
......@@ -42,7 +42,7 @@ static std::unordered_set<std::string>& CtrlFlowOps() {
static inline std::unique_ptr<OperatorBase> CreateGradOp(
const OperatorBase& op, const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) {
OpDescBind op_desc;
OpDesc op_desc;
op_desc.SetInputMap(op.Inputs());
op_desc.SetOutputMap(op.Outputs());
op_desc.SetType(op.Type());
......@@ -53,7 +53,7 @@ static inline std::unique_ptr<OperatorBase> CreateGradOp(
grad_ops.reserve(grad_descs.size());
std::transform(grad_descs.begin(), grad_descs.end(),
std::back_inserter(grad_ops),
[](const std::unique_ptr<OpDescBind>& grad_desc) {
[](const std::unique_ptr<OpDesc>& grad_desc) {
return OpRegistry::CreateOp(*grad_desc);
});
PADDLE_ENFORCE(!grad_ops.empty());
......@@ -217,7 +217,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// If part of input gradient of that operator is not calculated, fill
// zero variables to that input gradient.
net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}},
{{"Y", {grad_input}}},
{{"Out", {grad_input}}},
AttributeMap{}));
}
return false;
......@@ -296,7 +296,7 @@ static std::string FwdName(const std::string& grad_name) {
static void CreateGradVarInBlock(
size_t grad_op_start_index,
const std::unordered_map<std::string, std::string>& param_name_map,
BlockDescBind* block_desc,
BlockDesc* block_desc,
std::unordered_map<std::string, GradVarInfo>* grad_var_record) {
auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size();
......@@ -350,12 +350,11 @@ static void CreateGradVarInBlock(
}
}
std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
const OpDescBind* op_desc, std::unordered_set<std::string>* no_grad_vars,
std::vector<std::unique_ptr<OpDesc>> MakeOpGrad(
const OpDesc* op_desc, std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var,
const std::vector<BlockDescBind*>& grad_block =
std::vector<BlockDescBind*>()) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs;
const std::vector<BlockDesc*>& grad_block = std::vector<BlockDesc*>()) {
std::vector<std::unique_ptr<OpDesc>> grad_op_descs;
// All input gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& inputs = op_desc->InputArgumentNames();
if (AllGradInSet(inputs, *no_grad_vars)) {
......@@ -386,7 +385,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
.Get(op_desc->Type())
.GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block);
std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops;
std::list<std::unique_ptr<OpDesc>> pending_fill_zeros_ops;
for (auto& desc : grad_op_descs) {
for (const std::string& in_name : desc->InputArgumentNames()) {
if (no_grad_vars->count(in_name)) {
......@@ -394,9 +393,9 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
std::string new_name = prefix + kZeroVarSuffix;
desc->Rename(in_name, new_name);
std::unique_ptr<OpDescBind> fill_zeros_op(
new OpDescBind("fill_zeros_like", {{"X", {prefix}}},
{{"Y", {new_name}}}, AttributeMap{}));
std::unique_ptr<OpDesc> fill_zeros_op(
new OpDesc("fill_zeros_like", {{"X", {prefix}}},
{{"Out", {new_name}}}, AttributeMap{}));
pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
}
}
......@@ -408,34 +407,33 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
return grad_op_descs;
}
static BlockDescBind* CreateStepBlock(
ProgramDescBind& program_desc,
std::unordered_set<std::string>* no_grad_vars,
static BlockDesc* CreateStepBlock(
ProgramDesc& program_desc, std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var,
int step_block_idx);
std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx,
std::vector<std::unique_ptr<OpDesc>> MakeBlockBackward(
ProgramDesc& program_desc, int block_idx,
std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
VLOG(5) << "MakeBlockBackward";
BlockDescBind* cur_block = program_desc.MutableBlock(block_idx);
std::vector<OpDescBind*> op_descs = cur_block->AllOps();
BlockDesc* cur_block = program_desc.MutableBlock(block_idx);
std::vector<OpDesc*> op_descs = cur_block->AllOps();
std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
size_t grad_desc_idx = 0;
std::vector<std::unique_ptr<OpDescBind>> backward_descs;
std::vector<std::unique_ptr<OpDesc>> backward_descs;
for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
VLOG(5) << "Making backward " << (*it)->Type() << " op";
std::vector<std::unique_ptr<OpDescBind>> op_grads;
std::vector<std::unique_ptr<OpDesc>> op_grads;
if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") {
int step_block_idx = (*it)->GetBlockAttr("sub_block");
BlockDescBind* backward_block = CreateStepBlock(
program_desc, no_grad_vars, grad_to_var, step_block_idx);
BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars,
grad_to_var, step_block_idx);
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
} else if ((*it)->Type() == "conditional_block") {
BlockDescBind* backward_block =
BlockDesc* backward_block =
CreateStepBlock(program_desc, no_grad_vars, grad_to_var,
(*it)->GetBlockAttr("sub_block"));
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
......@@ -463,14 +461,14 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
}
++grad_desc_idx;
}
std::transform(
op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs),
[](std::unique_ptr<OpDescBind>& ptr) { return std::move(ptr); });
std::transform(op_grads.begin(), op_grads.end(),
std::back_inserter(backward_descs),
[](std::unique_ptr<OpDesc>& ptr) { return std::move(ptr); });
}
VLOG(5) << "Appending Sums";
// Check whether some variables are written more than once
std::list<std::pair<size_t, std::unique_ptr<OpDescBind>>> pending_sum_ops;
std::list<std::pair<size_t, std::unique_ptr<OpDesc>>> pending_sum_ops;
for (const auto& dup : dup_out_ops) {
const std::string& out_name = dup.first;
const std::vector<size_t> dup_op = dup.second;
......@@ -486,16 +484,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
sum_op_inputs.emplace_back(new_name);
next_g_name = sum_op_inputs.back();
}
std::unique_ptr<OpDescBind> sum_op(
new OpDescBind("sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}},
std::unique_ptr<OpDesc> sum_op(new OpDesc("sum", {{"X", sum_op_inputs}},
{{"Out", {out_name}}},
AttributeMap{}));
pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
}
}
pending_sum_ops.sort(
[](const std::pair<size_t, std::unique_ptr<OpDescBind>>& a,
const std::pair<size_t, std::unique_ptr<OpDescBind>>& b) {
pending_sum_ops.sort([](const std::pair<size_t, std::unique_ptr<OpDesc>>& a,
const std::pair<size_t, std::unique_ptr<OpDesc>>& b) {
return a.first > b.first;
});
for (auto& p : pending_sum_ops) {
......@@ -508,14 +505,13 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
return backward_descs;
}
static BlockDescBind* CreateStepBlock(
ProgramDescBind& program_desc,
std::unordered_set<std::string>* no_grad_vars,
static BlockDesc* CreateStepBlock(
ProgramDesc& program_desc, std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var,
int step_block_idx) {
auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx,
no_grad_vars, grad_to_var);
BlockDescBind* backward_block =
BlockDesc* backward_block =
program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
for (auto& ptr : backward_block_op_descs) {
backward_block->AppendAllocatedOp(move(ptr));
......@@ -524,7 +520,7 @@ static BlockDescBind* CreateStepBlock(
}
ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
ProgramDesc& program_desc, const VarDesc& target,
const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_var_names;
no_grad_var_names.reserve(no_grad_vars.size() + 1);
......@@ -541,8 +537,8 @@ ParamGradInfoMap AppendBackward(
PADDLE_ENFORCE(is_scalar, "target should be scalar");
VLOG(3) << "backward from loss=" << target.Name()
<< " data_type=" << target.GetDataType();
std::unique_ptr<OpDescBind> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
std::unique_ptr<OpDesc> fill_one_op(
new OpDesc("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", std::vector<int>{1}},
{"value", static_cast<float>(1.0)},
{"dtype", target.GetDataType()}}));
......
......@@ -49,7 +49,7 @@ using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
ProgramDesc& program_desc, const VarDesc& target,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework
......
......@@ -58,13 +58,13 @@ class RowWiseAddGradMaker : public SingleGradOpDescMaker {
using SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<OpDescBind> Apply() const override {
auto grad_op = new OpDescBind();
std::unique_ptr<OpDesc> Apply() const override {
auto grad_op = new OpDesc();
grad_op->SetInput(GradVarName("Out"), OutputGrad("Out"));
grad_op->SetOutput(GradVarName("X"), InputGrad("X"));
grad_op->SetOutput(GradVarName("b"), InputGrad("b"));
grad_op->SetType("rowwise_add_grad");
return std::unique_ptr<OpDescBind>(grad_op);
return std::unique_ptr<OpDesc>(grad_op);
}
};
......@@ -159,7 +159,7 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker {
FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "x");
AddOutput("Y", "out");
AddOutput("Out", "out");
AddComment("");
}
};
......@@ -190,11 +190,11 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase {
public:
using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override {
std::vector<std::unique_ptr<OpDescBind>> retv;
std::vector<std::unique_ptr<OpDesc>> operator()() const override {
std::vector<std::unique_ptr<OpDesc>> retv;
auto x_g = InputGrad("X");
if (!x_g.empty()) {
auto *op_desc = new OpDescBind();
auto *op_desc = new OpDesc();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", x_g);
......@@ -204,7 +204,7 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase {
auto y_g = InputGrad("Y");
if (!y_g.empty()) {
auto *op_desc = new OpDescBind();
auto *op_desc = new OpDesc();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", y_g);
......@@ -430,8 +430,8 @@ TEST(Backward, op_part_of_output_are_not_need) {
ASSERT_EQ("fill_zeros_like", fill_zero.Type());
ASSERT_EQ(1UL, fill_zero.Inputs("X").size());
ASSERT_EQ("Z", fill_zero.Input("X"));
ASSERT_EQ(1UL, fill_zero.Outputs("Y").size());
ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Y"));
ASSERT_EQ(1UL, fill_zero.Outputs("Out").size());
ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out"));
auto &d_many_out = *net->ops_[1];
ASSERT_EQ("many_output_op_grad", d_many_out.Type());
......@@ -505,25 +505,25 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
}
TEST(Backward, simple_single_op) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
f::OpDescBind *op = block->AppendOp();
f::OpDesc *op = block->AppendOp();
op->SetType("rowwise_add");
op->SetInput("X", {"x"});
op->SetInput("b", {"b"});
op->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out");
auto target = f::VarDesc("out");
target.SetShape({1});
auto var_to_grad =
AppendBackward(program, target, std::unordered_set<std::string>{});
ASSERT_EQ(block->AllOps().size(), 3UL);
f::OpDescBind *fill_op = block->AllOps()[1];
f::OpDesc *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op = block->AllOps()[2];
f::OpDesc *grad_op = block->AllOps()[2];
EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op->InputNames().size(), 1UL);
ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
......@@ -543,16 +543,16 @@ TEST(Backward, simple_single_op) {
}
TEST(Backward, default_attribute) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::OpDescBind *op = block->AppendOp();
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
f::OpDesc *op = block->AppendOp();
op->SetType("mul");
op->SetInput("X", {"x"});
op->SetInput("Y", {"y"});
op->SetOutput("Out", {"out"});
op->CheckAttrs();
auto target = f::VarDescBind("out");
auto target = f::VarDesc("out");
target.SetShape({1});
AppendBackward(program, target, std::unordered_set<std::string>{});
......@@ -560,47 +560,47 @@ TEST(Backward, default_attribute) {
EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1);
f::OpDescBind *fill_op = block->AllOps()[1];
f::OpDesc *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op = block->AllOps()[2];
f::OpDesc *grad_op = block->AllOps()[2];
ASSERT_EQ(grad_op->Type(), "mul_grad");
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1);
}
TEST(Backward, simple_mult_op) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp();
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
f::OpDesc *op1 = block->AppendOp();
op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp();
f::OpDesc *op2 = block->AppendOp();
op2->SetType("mul");
op2->SetInput("X", {"out1"});
op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp();
f::OpDesc *op3 = block->AppendOp();
op3->SetType("rowwise_add");
op3->SetInput("X", {"out2"});
op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3");
auto target = f::VarDesc("out3");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad =
AppendBackward(program, target, std::unordered_set<std::string>{});
ASSERT_EQ(block->AllOps().size(), 6UL + 1);
f::OpDescBind *fill_op = block->AllOps()[forward_len];
f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6];
f::OpDesc *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
......@@ -611,7 +611,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op2 = block->AllOps()[5];
f::OpDesc *grad_op2 = block->AllOps()[5];
EXPECT_EQ(grad_op2->Type(), "mul_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
......@@ -625,7 +625,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *grad_op3 = block->AllOps()[4];
f::OpDesc *grad_op3 = block->AllOps()[4];
EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
......@@ -655,42 +655,42 @@ TEST(Backward, simple_mult_op) {
}
TEST(Backward, intermedia_var_no_grad) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp();
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
f::OpDesc *op1 = block->AppendOp();
op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp();
f::OpDesc *op2 = block->AppendOp();
op2->SetType("mul");
op2->SetInput("X", {"x2"});
op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp();
f::OpDesc *op3 = block->AppendOp();
op3->SetType("rowwise_add");
op3->SetInput("X", {"out2"});
op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"});
f::OpDescBind *op4 = block->AppendOp();
f::OpDesc *op4 = block->AppendOp();
op4->SetType("mul");
op4->SetInput("X", {"out1"});
op4->SetInput("Y", {"out3"});
op4->SetOutput("Out", {"out4"});
auto target = f::VarDescBind("out4");
auto target = f::VarDesc("out4");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"out3"});
ASSERT_EQ(block->AllOps().size(), 7UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len];
f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6];
f::OpDesc *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
......@@ -701,7 +701,7 @@ TEST(Backward, intermedia_var_no_grad) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op4 = block->AllOps()[5];
f::OpDesc *grad_op4 = block->AllOps()[5];
EXPECT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
......@@ -726,32 +726,32 @@ TEST(Backward, intermedia_var_no_grad) {
}
TEST(Backward, var_no_grad) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp();
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
f::OpDesc *op1 = block->AppendOp();
op1->SetType("mult_in_out");
op1->SetInput("X", {"x1"});
op1->SetInput("H", {"h1"});
op1->SetOutput("Y", {"y1"});
op1->SetOutput("Z", {"z1"});
f::OpDescBind *op2 = block->AppendOp();
f::OpDesc *op2 = block->AppendOp();
op2->SetType("mult_in_out");
op2->SetInput("X", {"y1"});
op2->SetInput("H", {"z1"});
op2->SetOutput("Y", {"y2"});
op2->SetOutput("Z", {"z2"});
auto target = f::VarDescBind("z2");
auto target = f::VarDesc("z2");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"z1"});
ASSERT_EQ(block->AllOps().size(), 6UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len];
f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op2 = block->AllOps()[3];
f::OpDesc *grad_op2 = block->AllOps()[3];
ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
......@@ -767,15 +767,15 @@ TEST(Backward, var_no_grad) {
std::vector<std::string>({f::GradVarName("y1")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector<std::string>());
f::OpDescBind *fill_zero_op = block->AllOps()[4];
f::OpDesc *fill_zero_op = block->AllOps()[4];
ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
EXPECT_EQ(fill_zero_op->Input("X"), std::vector<std::string>({"z1"}));
EXPECT_EQ(fill_zero_op->Output("Y"),
EXPECT_EQ(fill_zero_op->Output("Out"),
std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix}));
f::OpDescBind *grad_op1 = block->AllOps()[5];
f::OpDesc *grad_op1 = block->AllOps()[5];
ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
......@@ -803,37 +803,37 @@ TEST(Backward, var_no_grad) {
}
TEST(Backward, shared_var) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp();
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
f::OpDesc *op1 = block->AppendOp();
op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp();
f::OpDesc *op2 = block->AppendOp();
op2->SetType("mul");
op2->SetInput("X", {"out1"});
op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp();
f::OpDesc *op3 = block->AppendOp();
op3->SetType("rowwise_add");
op3->SetInput("X", {"out1"});
op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3");
auto target = f::VarDesc("out3");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad =
AppendBackward(program, target, std::unordered_set<std::string>{});
ASSERT_EQ(block->AllOps().size(), 8UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len];
f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op3 = block->AllOps()[4];
f::OpDesc *grad_op3 = block->AllOps()[4];
ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
......@@ -844,7 +844,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")}));
f::OpDescBind *grad_op4 = block->AllOps()[5];
f::OpDesc *grad_op4 = block->AllOps()[5];
ASSERT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
......@@ -858,7 +858,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *sum_op = block->AllOps()[6];
f::OpDesc *sum_op = block->AllOps()[6];
ASSERT_EQ(sum_op->Type(), "sum");
ASSERT_EQ(sum_op->InputNames().size(), 1UL);
ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
......@@ -868,7 +868,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(sum_op->Output("Out"),
std::vector<std::string>({f::GradVarName("out1")}));
f::OpDescBind *grad_op1 = block->AllOps()[7];
f::OpDesc *grad_op1 = block->AllOps()[7];
ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
......@@ -895,19 +895,19 @@ TEST(Backward, shared_var) {
}
TEST(Backward, half_backward) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
auto *op1 = block->AppendOp();
op1->SetType("minus");
op1->SetInput("X", {"a"});
op1->SetInput("Y", {"b"});
op1->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out");
auto target = f::VarDesc("out");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"b"});
f::OpDescBind *fill_op = block->AllOps()[forward_len];
f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
auto ops = block->AllOps();
ASSERT_EQ(3UL, ops.size());
......
......@@ -19,18 +19,18 @@ limitations under the License. */
namespace paddle {
namespace framework {
VarDescBind *BlockDescBind::Var(const std::string &name) {
VarDesc *BlockDesc::Var(const std::string &name) {
auto it = vars_.find(name);
if (it != vars_.end()) {
return it->second.get();
}
need_update_ = true;
auto *var = new VarDescBind(name);
auto *var = new VarDesc(name);
vars_[name].reset(var);
return var;
}
VarDescBind *BlockDescBind::FindVar(const std::string &name) const {
VarDesc *BlockDesc::FindVar(const std::string &name) const {
auto it = vars_.find(name);
if (it == vars_.end()) {
return nullptr;
......@@ -38,11 +38,11 @@ VarDescBind *BlockDescBind::FindVar(const std::string &name) const {
return it->second.get();
}
bool BlockDescBind::HasVar(const std::string &name) const {
bool BlockDesc::HasVar(const std::string &name) const {
return vars_.find(name) != vars_.end();
}
VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const {
VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const {
if (name == kEmptyVarName) return nullptr;
auto it = vars_.find(name);
......@@ -53,53 +53,67 @@ VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const {
return it->second.get();
}
VarDescBind *BlockDescBind::FindRecursiveOrCreateVar(
const std::string &name_bytes) {
VarDescBind *res = FindVarRecursive(name_bytes);
VarDesc *BlockDesc::FindRecursiveOrCreateVar(const std::string &name_bytes) {
VarDesc *res = FindVarRecursive(name_bytes);
if (res == nullptr) {
res = Var(name_bytes);
}
return res;
}
bool BlockDescBind::HasVarRecursive(const std::string &name) const {
bool BlockDesc::HasVarRecursive(const std::string &name) const {
return FindVarRecursive(name) != nullptr;
}
std::vector<VarDescBind *> BlockDescBind::AllVars() const {
std::vector<VarDescBind *> res;
std::vector<VarDesc *> BlockDesc::AllVars() const {
std::vector<VarDesc *> res;
for (const auto &p : vars_) {
res.push_back(p.second.get());
}
return res;
}
OpDescBind *BlockDescBind::AppendOp() {
OpDesc *BlockDesc::AppendOp() {
need_update_ = true;
ops_.emplace_back(new OpDescBind());
ops_.emplace_back(new OpDesc());
return ops_.back().get();
}
void BlockDescBind::AppendAllocatedOp(std::unique_ptr<OpDescBind> &&op_desc) {
void BlockDesc::AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc) {
need_update_ = true;
ops_.emplace_back(std::move(op_desc));
}
OpDescBind *BlockDescBind::PrependOp() {
OpDesc *BlockDesc::PrependOp() {
need_update_ = true;
ops_.emplace_front(new OpDescBind());
ops_.emplace_front(new OpDesc());
return ops_.front().get();
}
std::vector<OpDescBind *> BlockDescBind::AllOps() const {
std::vector<OpDescBind *> res;
void BlockDesc::RemoveOp(size_t s, size_t e) {
if (ops_.begin() + s == ops_.end() || ops_.begin() + e == ops_.end()) {
return;
}
need_update_ = true;
for (auto it = ops_.begin() + s; it != ops_.begin() + e; it++) {
auto names = (*it)->InputArgumentNames();
for (auto n : names) {
// TODO(typhoonzero): delete vars if no other op use it.
VLOG(3) << "deleting var " << n;
}
}
ops_.erase(ops_.begin() + s, ops_.begin() + e);
}
std::vector<OpDesc *> BlockDesc::AllOps() const {
std::vector<OpDesc *> res;
for (const auto &op : ops_) {
res.push_back(op.get());
}
return res;
}
void BlockDescBind::Flush() {
void BlockDesc::Flush() {
for (auto &op_desc : ops_) {
op_desc->Flush();
}
......@@ -121,43 +135,43 @@ void BlockDescBind::Flush() {
}
}
BlockDescBind *BlockDescBind::ParentBlock() const {
BlockDesc *BlockDesc::ParentBlock() const {
if (this->desc_->parent_idx() == kNoneBlockIndex) {
return nullptr;
}
return prog_->MutableBlock(static_cast<size_t>(this->desc_->parent_idx()));
}
proto::BlockDesc *BlockDescBind::Proto() {
proto::BlockDesc *BlockDesc::Proto() {
Flush();
return desc_;
}
BlockDescBind::BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc)
BlockDesc::BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) {
for (const proto::VarDesc &var_desc : desc_->vars()) {
vars_[var_desc.name()].reset(new VarDescBind(var_desc));
vars_[var_desc.name()].reset(new VarDesc(var_desc));
}
for (const proto::OpDesc &op_desc : desc_->ops()) {
ops_.emplace_back(new OpDescBind(op_desc, prog));
ops_.emplace_back(new OpDesc(op_desc, prog));
}
}
BlockDescBind::BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc,
ProgramDescBind *prog)
BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc,
ProgramDesc *prog)
: prog_(prog), desc_(desc) {
need_update_ = true;
for (auto &op : other.ops_) {
ops_.emplace_back(new OpDescBind(*op));
ops_.emplace_back(new OpDesc(*op));
}
for (auto &it : other.vars_) {
auto *var = new VarDescBind(*it.second);
auto *var = new VarDesc(*it.second);
vars_[it.first].reset(var);
}
}
void BlockDescBind::ClearPBOps() {
void BlockDesc::ClearPBOps() {
auto ops = this->desc_->mutable_ops();
while (!ops->empty()) {
// we do not own the OpDesc, so release the ownership.
......@@ -165,7 +179,7 @@ void BlockDescBind::ClearPBOps() {
}
}
void BlockDescBind::ClearPBVars() {
void BlockDesc::ClearPBVars() {
auto vars = this->desc_->mutable_vars();
while (!vars->empty()) {
// we do not own the VarDesc, so release the ownership.
......
......@@ -28,20 +28,19 @@ limitations under the License. */
namespace paddle {
namespace framework {
class ProgramDescBind;
class ProgramDesc;
// Each Protobuf Message, we provide a XXXBind class. In that class, we optimize
// read/write speed. Only when we want the protobuf message, the local changes
// will be synchronized (by `Sync` method).
class BlockDescBind {
class BlockDesc {
public:
BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc);
BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc);
BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc,
ProgramDescBind *prog);
BlockDesc(const BlockDesc &other, proto::BlockDesc *desc, ProgramDesc *prog);
~BlockDescBind() {
~BlockDesc() {
this->ClearPBVars();
this->ClearPBOps();
}
......@@ -50,15 +49,15 @@ class BlockDescBind {
int32_t Parent() const { return desc_->parent_idx(); }
VarDescBind *Var(const std::string &name_bytes);
VarDesc *Var(const std::string &name_bytes);
VarDescBind *FindVar(const std::string &name_bytes) const;
VarDesc *FindVar(const std::string &name_bytes) const;
bool HasVar(const std::string &var_name) const;
VarDescBind *FindVarRecursive(const std::string &name_bytes) const;
VarDesc *FindVarRecursive(const std::string &name_bytes) const;
VarDescBind *FindRecursiveOrCreateVar(const std::string &name_bytes);
VarDesc *FindRecursiveOrCreateVar(const std::string &name_bytes);
bool HasVarRecursive(const std::string &var_name) const;
......@@ -70,41 +69,43 @@ class BlockDescBind {
return var_names;
}
std::vector<VarDescBind *> AllVars() const;
std::vector<VarDesc *> AllVars() const;
BlockDescBind *ParentBlock() const;
BlockDesc *ParentBlock() const;
OpDescBind *AppendOp();
OpDesc *AppendOp();
void AppendAllocatedOp(std::unique_ptr<OpDescBind> &&op_desc);
void AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc);
OpDescBind *PrependOp();
OpDesc *PrependOp();
std::vector<OpDescBind *> AllOps() const;
void RemoveOp(size_t s, size_t e);
std::vector<OpDesc *> AllOps() const;
size_t OpSize() const { return ops_.size(); }
OpDescBind *Op(int idx) { return ops_.at(idx).get(); }
OpDesc *Op(int idx) { return ops_.at(idx).get(); }
void Flush();
proto::BlockDesc *Proto();
ProgramDescBind *Program() { return this->prog_; }
ProgramDesc *Program() { return this->prog_; }
private:
void ClearPBOps();
void ClearPBVars();
private:
ProgramDescBind *prog_; // not_own
ProgramDesc *prog_; // not_own
proto::BlockDesc *desc_; // not_own
bool need_update_;
std::deque<std::unique_ptr<OpDescBind>> ops_;
std::unordered_map<std::string, std::unique_ptr<VarDescBind>> vars_;
std::deque<std::unique_ptr<OpDesc>> ops_;
std::unordered_map<std::string, std::unique_ptr<VarDesc>> vars_;
DISABLE_COPY_AND_ASSIGN(BlockDescBind);
DISABLE_COPY_AND_ASSIGN(BlockDesc);
};
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
namespace framework {
enum DataLayout {
kNHWC = 0,
kNCHW = 1,
kAnyLayout = 2,
};
inline DataLayout StringToDataLayout(const std::string& str) {
if (str == "NHWC" || str == "nhwc") {
return DataLayout::kNHWC;
} else if (str == "NCHW" || str == "nchw") {
return DataLayout::kNCHW;
} else {
PADDLE_THROW("Unknown storage order string: %s", str);
}
}
} // namespace framework
} // namespace paddle
......@@ -106,10 +106,10 @@ template <typename T>
struct OpInfoFiller<T, kGradOpDescMaker> {
void operator()(const char* op_type, OpInfo* info) const {
info->grad_op_maker_ = [](
const OpDescBind& fwd_op,
const OpDesc& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var,
const std::vector<BlockDescBind*>& grad_block) {
const std::vector<BlockDesc*>& grad_block) {
T maker(fwd_op, no_grad_set, grad_to_var, grad_block);
return maker();
};
......@@ -119,7 +119,7 @@ struct OpInfoFiller<T, kGradOpDescMaker> {
template <typename T>
struct OpInfoFiller<T, kVarTypeInference> {
void operator()(const char* op_type, OpInfo* info) const {
info->infer_var_type_ = [](const OpDescBind& fwd_op, BlockDescBind* block) {
info->infer_var_type_ = [](const OpDesc& fwd_op, BlockDesc* block) {
T inference;
inference(fwd_op, block);
};
......
......@@ -64,8 +64,8 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
}
}
void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
bool create_local_scope) {
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
bool create_local_scope, bool create_vars) {
// TODO(tonyyang-svail):
// - only runs on the first device (i.e. no interdevice communication)
// - will change to use multiple blocks for RNN op and Cond Op
......@@ -74,6 +74,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
auto& device = device_contexts_[0];
Scope* local_scope = scope;
if (create_vars) {
if (create_local_scope) {
local_scope = &scope->NewScope();
for (auto& var : block.AllVars()) {
......@@ -100,7 +101,8 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
} // if (create_local_scope)
} // if (create_vars)
for (auto& op_desc : block.AllOps()) {
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
......
......@@ -40,6 +40,16 @@ class DeviceContextPool {
return *pool;
}
const platform::DeviceContext* Borrow(const platform::Place& place) {
auto range = device_contexts_.equal_range(place);
if (range.first == range.second) {
PADDLE_THROW(
"'Place' is not supported, Please re-compile with WITH_GPU "
"option");
}
return range.first->second;
}
std::vector<const platform::DeviceContext*> Borrow(
const std::vector<platform::Place>& places) {
PADDLE_ENFORCE_GT(places.size(), 0);
......@@ -114,7 +124,8 @@ class Executor {
* ProgramDesc
* Scope
*/
void Run(const ProgramDescBind&, Scope*, int, bool create_local_scope = true);
void Run(const ProgramDesc&, Scope*, int, bool create_local_scope = true,
bool create_vars = true);
private:
std::vector<const platform::DeviceContext*> device_contexts_;
......
......@@ -22,21 +22,27 @@
namespace paddle {
namespace framework {
/*
This functor class is responsible for creating the gradient ops for the given
operator fwd_op. After it is called (through operator()), the pairs of
(gradient variable, corresponding input variable of fwd_op) will be added to
grad_to_var. If an input variable of fwd_op is contained in no_grad_set, its
gradient varialbe will be ignored or kEmptyVarName depending on the template
argument DropEmptyIG in the derived classes.
*/
class GradOpDescMakerBase {
public:
explicit GradOpDescMakerBase(
const OpDescBind& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
const OpDesc& fwd_op, const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var,
const std::vector<BlockDescBind*>& grad_block =
std::vector<BlockDescBind*>())
const std::vector<BlockDesc*>& grad_block = std::vector<BlockDesc*>())
: fwd_op_(fwd_op),
no_grad_set_(no_grad_set),
grad_to_var_(grad_to_var),
grad_block_(grad_block) {}
virtual ~GradOpDescMakerBase() = default;
virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0;
virtual std::vector<std::unique_ptr<OpDesc>> operator()() const = 0;
protected:
std::vector<std::string> InputGrad(const std::string& name,
......@@ -58,6 +64,16 @@ class GradOpDescMakerBase {
if (!drop_empty_grad) {
return ret_val;
}
PADDLE_ENFORCE_LE(var_names.size(), 1UL,
"BUG from operator developer:"
" for input argument with a list of variables, "
" drop_empty_grad is not allowed because it makes"
" the correspondence bewteen a variable and its gradient"
" ambiguous. Use REGISTER_OP_EX to register the op"
" or call InputGrad(?,false) in GradOpDescMaker."
" Op type %s",
fwd_op_.Type());
std::vector<std::string> dropped_ret_val;
dropped_ret_val.reserve(ret_val.size());
std::copy_if(ret_val.begin(), ret_val.end(),
......@@ -105,26 +121,26 @@ class GradOpDescMakerBase {
std::string ForwardOpType() const { return this->fwd_op_.Type(); }
private:
const OpDescBind& fwd_op_;
const OpDesc& fwd_op_;
const std::unordered_set<std::string>& no_grad_set_;
std::unordered_map<std::string, std::string>* grad_to_var_;
protected:
std::vector<BlockDescBind*> grad_block_;
std::vector<BlockDesc*> grad_block_;
};
class SingleGradOpDescMaker : public GradOpDescMakerBase {
public:
using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const {
std::vector<std::unique_ptr<OpDescBind>> retv;
std::vector<std::unique_ptr<OpDesc>> operator()() const {
std::vector<std::unique_ptr<OpDesc>> retv;
retv.emplace_back(this->Apply());
return retv;
}
protected:
virtual std::unique_ptr<OpDescBind> Apply() const = 0;
virtual std::unique_ptr<OpDesc> Apply() const = 0;
};
template <bool DropEmptyIG = true>
......@@ -133,8 +149,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
using SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
virtual std::unique_ptr<OpDescBind> Apply() const {
auto* grad = new OpDescBind();
virtual std::unique_ptr<OpDesc> Apply() const {
auto* grad = new OpDesc();
grad->SetType(this->GradOpType());
for (auto& input_param : this->InputNames()) {
......@@ -150,7 +166,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
grad->SetAttrMap(this->Attrs());
return std::unique_ptr<OpDescBind>(grad);
return std::unique_ptr<OpDesc>(grad);
}
virtual std::string GradOpType() const {
......@@ -161,7 +177,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
class EmptyGradOpMaker : public GradOpDescMakerBase {
public:
using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override {
std::vector<std::unique_ptr<OpDesc>> operator()() const override {
return {};
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle {
namespace framework {
// For more details about the design of LibraryType, Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library
enum LibraryType { kPlain = 0; kMKLDNN = 1; kCUDNN = 2; }
} // namespace
} // framework
......@@ -25,12 +25,11 @@ limitations under the License. */
namespace paddle {
namespace framework {
class OpDescBind;
class BlockDescBind;
class OpDesc;
class BlockDesc;
class CompileTimeInferShapeContext : public InferShapeContext {
public:
CompileTimeInferShapeContext(const OpDescBind &op,
const BlockDescBind &block);
CompileTimeInferShapeContext(const OpDesc &op, const BlockDesc &block);
bool HasInput(const std::string &name) const override;
......@@ -76,13 +75,12 @@ class CompileTimeInferShapeContext : public InferShapeContext {
void SetDim(const std::string &name, const DDim &dim) override;
const OpDescBind &op_;
const BlockDescBind &block_;
const OpDesc &op_;
const BlockDesc &block_;
};
OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs) {
OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs) {
desc_.set_type(type);
inputs_ = inputs;
outputs_ = outputs;
......@@ -90,7 +88,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
need_update_ = true;
}
OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog)
OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog)
: desc_(desc), need_update_(false) {
// restore inputs_
int input_size = desc_.inputs_size();
......@@ -126,20 +124,19 @@ OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog)
}
}
proto::OpDesc *OpDescBind::Proto() {
proto::OpDesc *OpDesc::Proto() {
Flush();
return &desc_;
}
const std::vector<std::string> &OpDescBind::Input(
const std::string &name) const {
const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
auto it = inputs_.find(name);
PADDLE_ENFORCE(it != inputs_.end(), "Input %s cannot be found in Op %s", name,
Type());
return it->second;
}
std::vector<std::string> OpDescBind::InputArgumentNames() const {
std::vector<std::string> OpDesc::InputArgumentNames() const {
std::vector<std::string> retv;
for (auto &ipt : this->inputs_) {
retv.insert(retv.end(), ipt.second.begin(), ipt.second.end());
......@@ -147,21 +144,20 @@ std::vector<std::string> OpDescBind::InputArgumentNames() const {
return retv;
}
void OpDescBind::SetInput(const std::string &param_name,
void OpDesc::SetInput(const std::string &param_name,
const std::vector<std::string> &args) {
need_update_ = true;
inputs_[param_name] = args;
}
const std::vector<std::string> &OpDescBind::Output(
const std::string &name) const {
const std::vector<std::string> &OpDesc::Output(const std::string &name) const {
auto it = outputs_.find(name);
PADDLE_ENFORCE(it != outputs_.end(), "Output %s cannot be found in Op %s",
name, Type());
return it->second;
}
std::vector<std::string> OpDescBind::OutputArgumentNames() const {
std::vector<std::string> OpDesc::OutputArgumentNames() const {
std::vector<std::string> retv;
for (auto &ipt : this->outputs_) {
retv.insert(retv.end(), ipt.second.begin(), ipt.second.end());
......@@ -169,19 +165,19 @@ std::vector<std::string> OpDescBind::OutputArgumentNames() const {
return retv;
}
void OpDescBind::SetOutput(const std::string &param_name,
void OpDesc::SetOutput(const std::string &param_name,
const std::vector<std::string> &args) {
need_update_ = true;
this->outputs_[param_name] = args;
}
proto::AttrType OpDescBind::GetAttrType(const std::string &name) const {
proto::AttrType OpDesc::GetAttrType(const std::string &name) const {
auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return static_cast<proto::AttrType>(it->second.which() - 1);
}
std::vector<std::string> OpDescBind::AttrNames() const {
std::vector<std::string> OpDesc::AttrNames() const {
std::vector<std::string> retv;
retv.reserve(attrs_.size());
for (auto &attr : attrs_) {
......@@ -190,41 +186,39 @@ std::vector<std::string> OpDescBind::AttrNames() const {
return retv;
}
void OpDescBind::SetAttr(const std::string &name, const Attribute &v) {
void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
this->attrs_[name] = v;
need_update_ = true;
}
void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) {
void OpDesc::SetBlockAttr(const std::string &name, BlockDesc &block) {
this->attrs_[name] = &block;
need_update_ = true;
}
void OpDescBind::SetAttrMap(
void OpDesc::SetAttrMap(
const std::unordered_map<std::string, Attribute> &attr_map) {
attrs_ = attr_map;
need_update_ = true;
}
Attribute OpDescBind::GetAttr(const std::string &name) const {
Attribute OpDesc::GetAttr(const std::string &name) const {
auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return it->second;
}
int OpDescBind::GetBlockAttr(const std::string &name) const {
int OpDesc::GetBlockAttr(const std::string &name) const {
auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return boost::get<BlockDescBind *>(it->second)->ID();
return boost::get<BlockDesc *>(it->second)->ID();
}
const std::unordered_map<std::string, Attribute> &OpDescBind::GetAttrMap()
const {
const std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() const {
return attrs_;
}
void OpDescBind::Rename(const std::string &old_name,
const std::string &new_name) {
void OpDesc::Rename(const std::string &old_name, const std::string &new_name) {
for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name);
}
......@@ -235,7 +229,7 @@ void OpDescBind::Rename(const std::string &old_name,
need_update_ = true;
}
void OpDescBind::RenameOutput(const std::string &old_name,
void OpDesc::RenameOutput(const std::string &old_name,
const std::string &new_name) {
for (auto &output : outputs_) {
std::replace(output.second.begin(), output.second.end(), old_name,
......@@ -244,7 +238,7 @@ void OpDescBind::RenameOutput(const std::string &old_name,
need_update_ = true;
}
void OpDescBind::RenameInput(const std::string &old_name,
void OpDesc::RenameInput(const std::string &old_name,
const std::string &new_name) {
for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name);
......@@ -278,7 +272,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
};
void OpDescBind::Flush() {
void OpDesc::Flush() {
if (need_update_) {
this->desc_.mutable_inputs()->Clear();
for (auto &ipt : inputs_) {
......@@ -330,7 +324,7 @@ static void InitInferShapeFuncs() {
});
}
void OpDescBind::CheckAttrs() {
void OpDesc::CheckAttrs() {
PADDLE_ENFORCE(!Type().empty(),
"CheckAttr() can not be called before type is setted.");
auto *checker = OpInfoMap::Instance().Get(Type()).Checker();
......@@ -342,7 +336,7 @@ void OpDescBind::CheckAttrs() {
checker->Check(attrs_);
}
void OpDescBind::InferShape(const BlockDescBind &block) const {
void OpDesc::InferShape(const BlockDesc &block) const {
VLOG(3) << "CompileTime infer shape on " << Type();
InitInferShapeFuncs();
auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
......@@ -365,7 +359,7 @@ void OpDescBind::InferShape(const BlockDescBind &block) const {
infer_shape(&ctx);
}
void OpDescBind::InferVarType(BlockDescBind *block) const {
void OpDesc::InferVarType(BlockDesc *block) const {
auto &info = OpInfoMap::Instance().Get(this->Type());
if (info.infer_var_type_) {
info.infer_var_type_(*this, block);
......@@ -384,7 +378,7 @@ void OpDescBind::InferVarType(BlockDescBind *block) const {
}
CompileTimeInferShapeContext::CompileTimeInferShapeContext(
const OpDescBind &op, const BlockDescBind &block)
const OpDesc &op, const BlockDesc &block)
: op_(op), block_(block) {}
bool CompileTimeInferShapeContext::HasInput(const std::string &name) const {
......
......@@ -23,17 +23,17 @@ limitations under the License. */
namespace paddle {
namespace framework {
class BlockDescBind;
class ProgramDescBind;
class BlockDesc;
class ProgramDesc;
class OpDescBind {
class OpDesc {
public:
OpDescBind() {}
OpDesc() {}
OpDescBind(const std::string &type, const VariableNameMap &inputs,
OpDesc(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs);
OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog);
OpDesc(const proto::OpDesc &desc, ProgramDesc *prog);
proto::OpDesc *Proto();
......@@ -65,7 +65,7 @@ class OpDescBind {
void SetAttr(const std::string &name, const Attribute &v);
void SetBlockAttr(const std::string &name, BlockDescBind &block);
void SetBlockAttr(const std::string &name, BlockDesc &block);
Attribute GetAttr(const std::string &name) const;
......@@ -107,9 +107,9 @@ class OpDescBind {
void CheckAttrs();
void InferShape(const BlockDescBind &block) const;
void InferShape(const BlockDesc &block) const;
void InferVarType(BlockDescBind *block) const;
void InferVarType(BlockDesc *block) const;
void MarkAsTarget() { desc_.set_is_target(true); }
......@@ -127,7 +127,9 @@ class OpDescBind {
}
proto::OpDesc desc_;
// input arg name => output variable names
VariableNameMap inputs_;
// output arg name => output variable names
VariableNameMap outputs_;
AttributeMap attrs_;
......
......@@ -47,7 +47,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) {
VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDescBind& op_desc) "
"used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead.";
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
......@@ -59,7 +59,7 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) {
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
return CreateOp(op_desc.Type(), op_desc.Inputs(), op_desc.Outputs(),
op_desc.GetAttrMap());
}
......
......@@ -79,7 +79,7 @@ class OpRegistry {
static std::unique_ptr<OperatorBase> CreateOp(const proto::OpDesc& op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
};
template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
......@@ -126,6 +126,14 @@ class OpKernelRegistrar : public Registrar {
__test_global_namespace_##uniq_name##__>::value, \
msg)
/*
The variadic arguments should be class types derived from one of the
following classes:
OpProtoAndCheckerMaker
GradOpDescMakerBase
VarTypeInference
InferShapeBase
*/
#define REGISTER_OPERATOR(op_type, op_class, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op__##op_type, \
......@@ -144,15 +152,24 @@ class OpKernelRegistrar : public Registrar {
}
/**
* Macro to register Operator.
* Macro to register Operator. When the input is duplicable, you should
* use REGISTER_OP_EX with deop_empty_grad=false instead.
*/
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class) \
REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class, true)
// When an argument is duplicable, we need to use this version.
// Perhaps we can omit DropEmptyIG template parameter and
// only have one version of REGISTER_OP.
#define REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class, drop_empty_grad) \
REGISTER_OPERATOR(grad_op_type, grad_op_class); \
class _GradOpDescMaker_##grad_op_type##_ \
: public ::paddle::framework::DefaultGradOpDescMaker<true> { \
: public ::paddle::framework::DefaultGradOpDescMaker<drop_empty_grad> { \
using ::paddle::framework::DefaultGradOpDescMaker< \
true>::DefaultGradOpDescMaker; \
drop_empty_grad>::DefaultGradOpDescMaker; \
\
protected: \
virtual std::string GradOpType() const { return #grad_op_type; } \
......
......@@ -18,49 +18,49 @@ limitations under the License. */
namespace paddle {
namespace framework {
BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) {
BlockDesc *ProgramDesc::AppendBlock(const BlockDesc &parent) {
auto *b = desc_.add_blocks();
b->set_parent_idx(parent.ID());
b->set_idx(desc_.blocks_size() - 1);
blocks_.emplace_back(new BlockDescBind(this, b));
blocks_.emplace_back(new BlockDesc(this, b));
return blocks_.back().get();
}
proto::ProgramDesc *ProgramDescBind::Proto() {
proto::ProgramDesc *ProgramDesc::Proto() {
for (auto &block : blocks_) {
block->Flush();
}
return &desc_;
}
ProgramDescBind::ProgramDescBind() {
ProgramDesc::ProgramDesc() {
auto *block = desc_.mutable_blocks()->Add();
block->set_idx(kRootBlockIndex);
block->set_parent_idx(kNoneBlockIndex);
blocks_.emplace_back(new BlockDescBind(this, block));
blocks_.emplace_back(new BlockDesc(this, block));
}
ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) {
ProgramDesc::ProgramDesc(const ProgramDesc &o) {
desc_ = o.desc_;
for (int i = 0; i < desc_.blocks_size(); ++i) {
auto *block = desc_.mutable_blocks(i);
blocks_.emplace_back(new BlockDescBind(*o.blocks_[i], block, this));
blocks_.emplace_back(new BlockDesc(*o.blocks_[i], block, this));
}
}
ProgramDescBind::ProgramDescBind(const proto::ProgramDesc &desc) {
ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) {
desc_ = desc;
for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block_desc));
blocks_.emplace_back(new BlockDesc(this, &block_desc));
}
}
ProgramDescBind::ProgramDescBind(const std::string &binary_str) {
ProgramDesc::ProgramDesc(const std::string &binary_str) {
PADDLE_ENFORCE(desc_.ParseFromString(binary_str),
"Fail to parse program_desc from binary string.");
for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block_desc));
blocks_.emplace_back(new BlockDesc(this, &block_desc));
}
}
......
......@@ -23,23 +23,23 @@ limitations under the License. */
namespace paddle {
namespace framework {
class BlockDescBind;
class BlockDesc;
class ProgramDescBind {
class ProgramDesc {
public:
ProgramDescBind();
ProgramDesc();
explicit ProgramDescBind(const proto::ProgramDesc &desc);
explicit ProgramDesc(const proto::ProgramDesc &desc);
ProgramDescBind(const ProgramDescBind &o);
ProgramDesc(const ProgramDesc &o);
explicit ProgramDescBind(const std::string &binary_str);
explicit ProgramDesc(const std::string &binary_str);
BlockDescBind *AppendBlock(const BlockDescBind &parent);
BlockDesc *AppendBlock(const BlockDesc &parent);
BlockDescBind *MutableBlock(size_t idx) { return blocks_[idx].get(); }
BlockDesc *MutableBlock(size_t idx) { return blocks_[idx].get(); }
const BlockDescBind &Block(size_t idx) const { return *blocks_[idx]; }
const BlockDesc &Block(size_t idx) const { return *blocks_[idx]; }
size_t Size() const { return blocks_.size(); }
......@@ -48,7 +48,7 @@ class ProgramDescBind {
private:
proto::ProgramDesc desc_;
std::vector<std::unique_ptr<BlockDescBind>> blocks_;
std::vector<std::unique_ptr<BlockDesc>> blocks_;
};
} // namespace framework
} // namespace paddle
......@@ -19,7 +19,7 @@
namespace paddle {
namespace framework {
TEST(ProgramDesc, copy_ctor) {
ProgramDescBind program;
ProgramDesc program;
auto* global_block = program.MutableBlock(0);
auto* x = global_block->Var("X");
x->SetType(proto::VarDesc_VarType_LOD_TENSOR);
......@@ -42,12 +42,12 @@ TEST(ProgramDesc, copy_ctor) {
out->SetType(proto::VarDesc_VarType_LOD_TENSOR);
op->SetOutput("Y", {out->Name()});
ProgramDescBind program_copy(program);
ProgramDesc program_copy(program);
auto* global_block_copy = program_copy.MutableBlock(0);
ASSERT_NE(global_block, global_block_copy);
auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) {
auto assert_same_var = [&](const std::string& name, VarDesc* var_before) {
ASSERT_TRUE(global_block_copy->HasVar(name));
auto* copy = global_block_copy->Var(name);
ASSERT_NE(copy, var_before);
......@@ -81,7 +81,7 @@ TEST(ProgramDesc, copy_ctor) {
}
TEST(ProgramDescBind, serialize_and_deserialize) {
ProgramDescBind program_origin;
ProgramDesc program_origin;
auto* global_block = program_origin.MutableBlock(0);
auto* x = global_block->Var("X");
x->SetType(proto::VarDesc_VarType_LOD_TENSOR);
......@@ -107,11 +107,11 @@ TEST(ProgramDescBind, serialize_and_deserialize) {
std::string binary_str;
program_origin.Proto()->SerializeToString(&binary_str);
ProgramDescBind program_restored(binary_str);
ProgramDesc program_restored(binary_str);
auto* global_block_restored = program_restored.MutableBlock(0);
ASSERT_NE(global_block, global_block_restored);
auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) {
auto assert_same_var = [&](const std::string& name, VarDesc* var_before) {
ASSERT_TRUE(global_block_restored->HasVar(name));
auto* restored = global_block_restored->Var(name);
ASSERT_NE(restored, var_before);
......
......@@ -29,7 +29,7 @@ namespace ops = paddle::operators;
void AddOp(const std::string &type, const f::VariableNameMap &inputs,
const f::VariableNameMap &outputs, f::AttributeMap attrs,
paddle::framework::BlockDescBind *block) {
paddle::framework::BlockDesc *block) {
// insert output
for (auto kv : outputs) {
for (auto v : kv.second) {
......@@ -51,8 +51,8 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
}
TEST(Prune, one_operator) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{},
block);
......@@ -69,8 +69,8 @@ TEST(Prune, one_operator) {
}
TEST(Prune, forward) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{},
block);
......@@ -92,8 +92,8 @@ TEST(Prune, forward) {
}
TEST(Prune, multi_input_op) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_one", {{"input", {"a0"}}}, {{"output", {"b0"}}}, f::AttributeMap{},
block);
......@@ -113,8 +113,8 @@ TEST(Prune, multi_input_op) {
}
TEST(Prune, multi_output_op) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}},
f::AttributeMap{}, block);
......@@ -132,8 +132,8 @@ TEST(Prune, multi_output_op) {
}
TEST(Prune, multi_target) {
f::ProgramDescBind program;
f::BlockDescBind *block = program.MutableBlock(0);
f::ProgramDesc program;
f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}},
f::AttributeMap{}, block);
......
......@@ -25,11 +25,9 @@
namespace paddle {
namespace framework {
class OperatorBase;
class OpDescBind;
class BlockDescBind;
class BlockDesc;
class OpDesc;
class InferShapeContext;
class BlockDescBind;
class BlockDesc;
using VariableNameMap = std::map<std::string, std::vector<std::string>>;
......@@ -37,7 +35,7 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using Attribute =
boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDescBind*>;
std::vector<bool>, BlockDesc*>;
using AttributeMap = std::unordered_map<std::string, Attribute>;
......@@ -45,13 +43,13 @@ using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDescBind>>(
const OpDescBind&, const std::unordered_set<std::string>& /*no_grad_set*/,
using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDesc>>(
const OpDesc&, const std::unordered_set<std::string>& /*no_grad_set*/,
std::unordered_map<std::string, std::string>* /*grad_to_var*/,
const std::vector<BlockDescBind*>& grad_block)>;
const std::vector<BlockDesc*>& grad_block)>;
using InferVarTypeFN = std::function<void(const OpDescBind& /*op_desc*/,
BlockDescBind* /*block*/)>;
using InferVarTypeFN =
std::function<void(const OpDesc& /*op_desc*/, BlockDesc* /*block*/)>;
using InferShapeFN = std::function<void(InferShapeContext*)>;
......
......@@ -18,29 +18,27 @@ limitations under the License. */
namespace paddle {
namespace framework {
proto::VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); }
proto::VarDesc::VarType VarDesc::GetType() const { return desc_.type(); }
void VarDescBind::SetType(proto::VarDesc::VarType type) {
desc_.set_type(type);
}
void VarDesc::SetType(proto::VarDesc::VarType type) { desc_.set_type(type); }
void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
void VarDesc::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
}
void VarDescBind::SetDataType(proto::DataType data_type) {
void VarDesc::SetDataType(proto::DataType data_type) {
mutable_tensor_desc()->set_data_type(data_type);
}
std::vector<int64_t> VarDescBind::Shape() const {
std::vector<int64_t> VarDesc::Shape() const {
return RepeatedToVector(tensor_desc().dims());
}
proto::DataType VarDescBind::GetDataType() const {
proto::DataType VarDesc::GetDataType() const {
return tensor_desc().data_type();
}
void VarDescBind::SetLoDLevel(int32_t lod_level) {
void VarDesc::SetLoDLevel(int32_t lod_level) {
switch (desc_.type()) {
case proto::VarDesc::LOD_TENSOR:
desc_.mutable_lod_tensor()->set_lod_level(lod_level);
......@@ -54,7 +52,7 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) {
}
}
int32_t VarDescBind::GetLodLevel() const {
int32_t VarDesc::GetLodLevel() const {
switch (desc_.type()) {
case proto::VarDesc::LOD_TENSOR:
return desc_.lod_tensor().lod_level();
......@@ -66,7 +64,7 @@ int32_t VarDescBind::GetLodLevel() const {
}
}
const proto::TensorDesc &VarDescBind::tensor_desc() const {
const proto::TensorDesc &VarDesc::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type");
switch (desc_.type()) {
case proto::VarDesc::SELECTED_ROWS:
......@@ -80,7 +78,7 @@ const proto::TensorDesc &VarDescBind::tensor_desc() const {
}
}
proto::TensorDesc *VarDescBind::mutable_tensor_desc() {
proto::TensorDesc *VarDesc::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(),
"invoke MutableTensorDesc must after set type");
switch (desc_.type()) {
......
......@@ -53,14 +53,14 @@ inline void VectorToRepeated(const std::vector<bool> &vec,
}
}
class VarDescBind {
class VarDesc {
public:
explicit VarDescBind(const std::string &name) {
explicit VarDesc(const std::string &name) {
desc_.set_name(name);
desc_.set_type(proto::VarDesc::LOD_TENSOR);
}
explicit VarDescBind(const proto::VarDesc &desc) : desc_(desc) {}
explicit VarDesc(const proto::VarDesc &desc) : desc_(desc) {}
proto::VarDesc *Proto() { return &desc_; }
......
......@@ -21,8 +21,7 @@ namespace framework {
class VarTypeInference {
public:
virtual ~VarTypeInference() {}
virtual void operator()(const OpDescBind& op_desc,
BlockDescBind* block) const = 0;
virtual void operator()(const OpDesc& op_desc, BlockDesc* block) const = 0;
};
} // namespace framework
......
......@@ -33,8 +33,7 @@ class SumOpMaker : public OpProtoAndCheckerMaker {
class SumOpVarTypeInference : public VarTypeInference {
public:
void operator()(const OpDescBind &op_desc,
BlockDescBind *block) const override {
void operator()(const OpDesc &op_desc, BlockDesc *block) const override {
auto &inputs = op_desc.Input("X");
auto default_var_type = proto::VarDesc::SELECTED_ROWS;
......@@ -62,7 +61,7 @@ namespace paddle {
namespace framework {
TEST(InferVarType, sum_op) {
ProgramDescBind prog;
ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum");
op->SetInput("X", {"test_a", "test_b", "test_c"});
......@@ -85,7 +84,7 @@ TEST(InferVarType, sum_op) {
}
TEST(InferVarType, sum_op_without_infer_var_type) {
ProgramDescBind prog;
ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum_without_infer_var_type");
op->SetInput("X", {"test2_a", "test2_b", "test2_c"});
......
......@@ -62,33 +62,6 @@ void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
}
}
template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
void* dst,
platform::GPUPlace src_place,
const void* src, size_t num) {
platform::SetDeviceId(src_place.device);
platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost);
}
template <>
void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace dst_place,
void* dst,
platform::CPUPlace src_place,
const void* src, size_t num) {
platform::SetDeviceId(dst_place.device);
platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice);
}
template <>
void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
void* dst,
platform::GPUPlace src_place,
const void* src, size_t num) {
platform::SetDeviceId(dst_place.device);
platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToDevice);
}
#endif
} // namespace memory
......
......@@ -26,7 +26,7 @@ template <int BlockSize>
__global__ void AccuracyCudaKernel(const int N, const int D,
const int64_t* Xdata,
const int64_t* labeldata, int* correct_data,
float* accuracy) {
float* accuracy, int* total_data) {
int count = 0;
__shared__ int total[BlockSize];
......@@ -47,6 +47,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D,
if (threadIdx.x == 0) {
*correct_data = result;
*accuracy = static_cast<float>(result) / static_cast<float>(N);
*total_data = N;
}
}
......@@ -80,22 +81,11 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
if (num_samples == 0) {
return;
}
platform::GpuMemcpyAsync(total_data, &num_samples, sizeof(int),
cudaMemcpyHostToDevice, stream);
AccuracyCudaKernel<
PADDLE_CUDA_NUM_THREADS><<<1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
num_samples, infer_width, indices_data, label_data, correct_data,
accuracy_data);
int d_num_samples, d_num_correct;
float d_accuracy;
platform::GpuMemcpyAsync(&d_num_correct, correct_data, sizeof(int),
cudaMemcpyDeviceToHost, stream);
platform::GpuMemcpyAsync(&d_num_samples, total_data, sizeof(int),
cudaMemcpyDeviceToHost, stream);
platform::GpuMemcpyAsync(&d_accuracy, accuracy_data, sizeof(float),
cudaMemcpyDeviceToHost, stream);
accuracy_data, total_data);
}
};
......
......@@ -149,14 +149,14 @@ class ArrayToLoDTensorGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("lod_tensor_to_array");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetInput("RankTable", Input("RankTable"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -121,12 +121,12 @@ class AssignGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *op = new framework::OpDesc();
op->SetType("assign");
op->SetInput("X", OutputGrad("Out"));
op->SetOutput("Out", InputGrad("X"));
return std::unique_ptr<framework::OpDescBind>(op);
return std::unique_ptr<framework::OpDesc>(op);
}
};
......
......@@ -13,12 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/batch_norm_op.h"
#include "paddle/framework/data_layout.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DataLayout = framework::DataLayout;
template <typename T>
using EigenArrayMap =
......@@ -60,14 +62,14 @@ class BatchNormOp : public framework::OperatorWithKernel {
"Variance and VarianceOut should share the same memory");
const auto x_dims = ctx->GetInputDim("X");
const TensorFormat tensor_format =
StringToTensorFormat(ctx->Attrs().Get<std::string>("tensor_format"));
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"Input X must have 2 to 5 dimensions.");
const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL);
......@@ -90,7 +92,7 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<bool>("is_test", "").SetDefault(false);
AddAttr<float>("momentum", "").SetDefault(0.9);
AddAttr<float>("epsilon", "").SetDefault(1e-5);
AddAttr<std::string>("tensor_format", "").SetDefault("NCHW");
AddAttr<std::string>("data_layout", "").SetDefault("NCHW");
AddInput("X", "The input tensor");
AddInput("Scale",
"Scale is a 1-dimensional tensor of size C "
......@@ -141,9 +143,9 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
const float epsilon = ctx.Attr<float>("epsilon");
const float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test");
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
const auto *x = ctx.Input<Tensor>("X");
const auto &x_dims = x->dims();
......@@ -151,7 +153,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
"The Input dim size should be between 2 and 5");
const int N = x_dims[0];
const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;
......@@ -177,8 +179,8 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
saved_mean_e.setZero();
saved_variance_e.setZero();
switch (tensor_format) {
case TensorFormat::NCHW: {
switch (data_layout) {
case DataLayout::kNCHW: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
saved_mean_e(nc % C) += x_arr.col(nc).sum();
......@@ -191,7 +193,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
saved_variance_e /= N * sample_size;
break;
}
case TensorFormat::NHWC: {
case DataLayout::kNHWC: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), C, N * sample_size);
for (int i = 0; i < N * sample_size; ++i) {
saved_mean_e += x_arr.col(i);
......@@ -205,7 +207,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", tensor_format_str);
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
}
EigenVectorArrayMap<T> running_mean_arr(
......@@ -247,8 +249,8 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
Eigen::Array<T, Eigen::Dynamic, 1> new_bias =
bias_arr - mean_arr * inv_std * scale_arr;
switch (tensor_format) {
case TensorFormat::NCHW: {
switch (data_layout) {
case DataLayout::kNCHW: {
EigenArrayMap<T> y_arr(y->mutable_data<T>(ctx.GetPlace()), sample_size,
N * C);
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
......@@ -257,7 +259,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
}
break;
}
case TensorFormat::NHWC: {
case DataLayout::kNHWC: {
EigenArrayMap<T>(y->mutable_data<T>(ctx.GetPlace()), C,
N * sample_size) =
(ConstEigenArrayMap<T>(x->data<T>(), C, N * sample_size).colwise() *
......@@ -267,7 +269,7 @@ class BatchNormKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %d", tensor_format);
PADDLE_THROW("Unknown storage order: %d", data_layout);
}
}
};
......@@ -290,10 +292,10 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Bias")), "");
const auto x_dims = ctx->GetInputDim("X");
const TensorFormat tensor_format =
StringToTensorFormat(ctx->Attrs().Get<std::string>("tensor_format"));
const DataLayout data_layout = framework::StringToDataLayout(
ctx->Attrs().Get<std::string>("data_layout"));
const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
......@@ -333,9 +335,9 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
const auto *saved_mean = ctx.Input<Tensor>("SavedMean");
// SavedVariance have been reverted in forward operator
const auto *saved_inv_variance = ctx.Input<Tensor>("SavedVariance");
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
// Get the size for each dimension.
// NCHW [batch_size, in_channels, in_height, in_width]
......@@ -344,7 +346,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
"The Input dim size should be between 2 and 5");
const int N = x_dims[0];
const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1]
(data_layout == DataLayout::kNCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]);
const int sample_size = x->numel() / N / C;
......@@ -376,8 +378,8 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
const auto scale_inv_var_nhw = scale_arr * inv_var_arr / (N * sample_size);
switch (tensor_format) {
case TensorFormat::NCHW: {
switch (data_layout) {
case DataLayout::kNCHW: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
ConstEigenArrayMap<T> d_y_arr(d_y->data<T>(), sample_size, N * C);
EigenArrayMap<T> d_x_arr(d_x->mutable_data<T>(ctx.GetPlace()),
......@@ -400,7 +402,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
}
break;
}
case TensorFormat::NHWC: {
case DataLayout::kNHWC: {
ConstEigenArrayMap<T> x_arr(x->data<T>(), C, N * sample_size);
ConstEigenArrayMap<T> d_y_arr(d_y->data<T>(), C, N * sample_size);
EigenArrayMap<T> d_x_arr(d_x->mutable_data<T>(ctx.GetPlace()), C,
......@@ -425,7 +427,7 @@ class BatchNormGradKernel<platform::CPUDeviceContext, T>
break;
}
default:
PADDLE_THROW("Unknown storage order: %s", tensor_format_str);
PADDLE_THROW("Unknown storage order: %s", data_layout_str);
}
}
};
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/batch_norm_op.h"
#include "paddle/framework/data_layout.h"
#include <cfloat>
#include "paddle/operators/math/math_function.h"
......@@ -22,12 +23,12 @@ namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DataLayout = framework::DataLayout;
template <typename T>
using CudnnDataType = platform::CudnnDataType<T>;
void ExtractNCWHD(const framework::DDim &dims,
const TensorFormat &tensor_format, int *N, int *C, int *H,
int *W, int *D) {
void ExtractNCWHD(const framework::DDim &dims, const DataLayout &data_layout,
int *N, int *C, int *H, int *W, int *D) {
*N = dims[0];
if (dims.size() == 2) {
*C = dims[1];
......@@ -35,13 +36,13 @@ void ExtractNCWHD(const framework::DDim &dims,
*W = 1;
*D = 1;
} else {
*C = tensor_format == TensorFormat::NCHW ? dims[1] : dims[dims.size() - 1];
*H = tensor_format == TensorFormat::NCHW ? dims[2] : dims[1];
*C = data_layout == DataLayout::kNCHW ? dims[1] : dims[dims.size() - 1];
*H = data_layout == DataLayout::kNCHW ? dims[2] : dims[1];
*W = dims.size() > 3
? (tensor_format == TensorFormat::NCHW ? dims[3] : dims[2])
? (data_layout == DataLayout::kNCHW ? dims[3] : dims[2])
: 1;
*D = dims.size() > 4
? (tensor_format == TensorFormat::NCHW ? dims[4] : dims[3])
? (data_layout == DataLayout::kNCHW ? dims[4] : dims[3])
: 1;
}
}
......@@ -56,9 +57,9 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
double epsilon = static_cast<double>(ctx.Attr<float>("epsilon"));
const float momentum = ctx.Attr<float>("momentum");
const bool is_test = ctx.Attr<bool>("is_test");
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
// Get the size for each dimension.
// NCHW [batch_size, in_channels, in_height, in_width]
......@@ -67,7 +68,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 2 and 5");
int N, C, H, W, D;
ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D);
ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
// ------------------- cudnn descriptors ---------------------
cudnnTensorDescriptor_t data_desc_;
......@@ -93,7 +94,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
VLOG(1) << "Setting descriptors.";
std::vector<int> dims;
std::vector<int> strides;
if (tensor_format == TensorFormat::NCHW) {
if (data_layout == DataLayout::kNCHW) {
dims = {N, C, H, W, D};
strides = {C * H * W * D, H * W * D, W * D, D, 1};
} else {
......@@ -180,9 +181,9 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");
double epsilon = static_cast<double>(ctx.Attr<float>("epsilon"));
const std::string tensor_format_str =
ctx.Attr<std::string>("tensor_format");
const TensorFormat tensor_format = StringToTensorFormat(tensor_format_str);
const std::string data_layout_str = ctx.Attr<std::string>("data_layout");
const DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
const auto *x = ctx.Input<Tensor>("X");
const auto *d_y = ctx.Input<Tensor>(framework::GradVarName("Y"));
const auto *scale = ctx.Input<Tensor>("Scale");
......@@ -192,7 +193,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 2 and 5");
int N, C, H, W, D;
ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D);
ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D);
PADDLE_ENFORCE_EQ(scale->dims().size(), 1UL);
PADDLE_ENFORCE_EQ(scale->dims()[0], C);
......@@ -219,7 +220,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
std::vector<int> dims;
std::vector<int> strides;
if (tensor_format == TensorFormat::NCHW) {
if (data_layout == DataLayout::kNCHW) {
dims = {N, C, H, W, D};
strides = {C * H * W * D, H * W * D, W * D, D, 1};
} else {
......
......@@ -19,21 +19,6 @@ limitations under the License. */
namespace paddle {
namespace operators {
enum TensorFormat {
NHWC = 0,
NCHW = 1,
};
inline TensorFormat StringToTensorFormat(const std::string& str) {
if (str == "NHWC" || str == "nhwc") {
return TensorFormat::NHWC;
} else if (str == "NCHW" || str == "nchw") {
return TensorFormat::NCHW;
} else {
PADDLE_THROW("Unknown storage order string: %s", str);
}
}
template <typename DeviceContext, typename T>
class BatchNormKernel : public framework::OpKernel<T> {
public:
......
......@@ -119,8 +119,8 @@ class BeamSearchDecodeInferShape : public framework::InferShapeBase {
class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override {
void operator()(const framework::OpDesc& op_desc,
framework::BlockDesc* block) const override {
for (auto& o : op_desc.Output("SentenceIds")) {
block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
}
......
......@@ -52,14 +52,14 @@ class CastOpGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto grad = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto grad = new framework::OpDesc();
grad->SetType("cast");
grad->SetInput("X", OutputGrad("Out"));
grad->SetOutput("Out", InputGrad("X"));
grad->SetAttr("out_dtype", GetAttr("in_dtype"));
grad->SetAttr("in_dtype", GetAttr("out_dtype"));
return std::unique_ptr<framework::OpDescBind>(grad);
return std::unique_ptr<framework::OpDesc>(grad);
}
};
......
......@@ -98,8 +98,8 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad,
ops::ConcatOpGrad)
REGISTER_OP_EX(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad,
ops::ConcatOpGrad, false)
REGISTER_OP_CPU_KERNEL(concat,
ops::ConcatKernel<paddle::platform::CPUPlace, float>)
REGISTER_OP_CPU_KERNEL(concat_grad,
......
......@@ -65,7 +65,7 @@ class ConditionalBlockOp : public ConditionalOp {
scopes->front() = &scope.NewScope();
auto &cur_scope = *scopes->front();
auto *block = Attr<framework::BlockDescBind *>("sub_block");
auto *block = Attr<framework::BlockDesc *>("sub_block");
framework::Executor exec(dev_ctx);
exec.Run(*block->Program(), &cur_scope, block->ID(), false);
}
......@@ -86,7 +86,7 @@ class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
"(std::vector<Scope*>) The step scope of conditional block. To "
"unify the conditional block, rnn and while op, the type of "
"scope is std::vector<Scope*>");
AddAttr<framework::BlockDescBind *>(
AddAttr<framework::BlockDesc *>(
"sub_block", "The step block of conditional block operator");
AddComment(R"DOC(Conditional block operator
......@@ -116,7 +116,7 @@ class ConditionalBlockGradOp : public ConditionalOp {
auto &scopes = scope_var->Get<std::vector<framework::Scope *>>();
framework::Scope &cur_scope = *scopes[0];
auto *block = Attr<framework::BlockDescBind *>("sub_block");
auto *block = Attr<framework::BlockDesc *>("sub_block");
framework::Executor exec(dev_ctx);
exec.Run(*block->Program(), &cur_scope, block->ID(), false);
......@@ -170,18 +170,19 @@ class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto grad_op = new framework::OpDesc();
grad_op->SetType("conditional_block_grad");
grad_op->SetInput("X", Input("X"));
grad_op->SetInput("Params", Input("Params"));
grad_op->SetInput("Out", Output("Out"));
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
grad_op->SetInput("Scope", Output("Scope"));
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
grad_op->SetOutput(framework::GradVarName("Params"), InputGrad("Params"));
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X", false));
grad_op->SetOutput(framework::GradVarName("Params"),
InputGrad("Params", false));
grad_op->SetBlockAttr("sub_block", *this->grad_block_[0]);
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -21,8 +21,6 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
public:
CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv2DTransposeOpMaker(proto, op_checker) {
AddAttr<std::vector<int>>("dilations", "dilations of convolution operator.")
.SetDefault({1, 1});
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
......@@ -37,8 +35,6 @@ class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker {
public:
CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv3DTransposeOpMaker(proto, op_checker) {
AddAttr<std::vector<int>>("dilations", "dilations of convolution operator.")
.SetDefault({1, 1, 1});
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
......
......@@ -29,6 +29,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
"ConvTransposeOp intput should be 4-D or 5-D tensor.");
......@@ -41,14 +42,18 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
"ConvTransposeOp paddings dimension and strides "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(paddings.size(), dilations.size(),
"ConvTransposeOp paddings dimension and dilations "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
"In ConvTransposeOp, The input channel should be the same "
"as the number of filters.");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[1]});
for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] +
filter_dims[i + 2]);
filter_extent);
}
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
}
......@@ -73,6 +78,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator. "
"The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("dilations",
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of convolution "
"transpose operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>(
"strides",
"(vector<int> default:{1, 1}), the strides(h_stride, w_stride) of "
......@@ -87,7 +98,7 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
Convolution2D Transpose Operator.
The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
and dilations, strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input) and output(Output) are in NCHW format. Where N is batchsize, C is the
number of channels, H is the height of the feature, and W is the width of the feature.
......@@ -136,6 +147,13 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
"Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature.");
AddAttr<std::vector<int>>(
"dilations",
"(vector<int> default:{1, 1, 1}), the "
"dilations(d_dilation,h_dilation, w_dilation) of convolution "
"transpose operator.")
.SetDefault({1, 1, 1});
AddAttr<std::vector<int>>("strides",
"(vector<int> default:{1, 1, 1}), the "
"strides{d_stride, h_stride, w_stride} of "
......@@ -149,7 +167,7 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
Convolution3D Transpose Operator.
The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
and dilations, strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input) and output(Output) are in NCDHW format. Where N is batch size, C is the
number of channels, D is the depth of the feature, H is the height of the feature,
......
......@@ -61,6 +61,7 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
// groups will alway be disabled in conv2dtranspose.
const int batch_size = static_cast<int>(input->dims()[0]);
......@@ -113,7 +114,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
math::Col2ImFunctor<math::ColFormat::kCFO, DeviceContext, T> col2im;
math::Col2VolFunctor<DeviceContext, T> col2vol;
std::vector<int> dilations({1, 1, 1});
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// on input)
......@@ -165,6 +165,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
const int batch_size = static_cast<int>(input->dims()[0]);
......@@ -219,7 +220,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
math::Im2ColFunctor<math::ColFormat::kCFO, DeviceContext, T> im2col;
math::Vol2ColFunctor<DeviceContext, T> vol2col;
std::vector<int> dilations({1, 1, 1});
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace());
......
......@@ -20,25 +20,57 @@ namespace detail {
Status SendRecvServerImpl::SendVariable(ServerContext *context,
const VariableMessage *in_var,
VariableMessage *out_var) {
framework::LoDTensor t;
// TODO(typhoonzero): desirealize in_tensor and run pserver network.
VoidMessage *out_var) {
// TODO(typhoonzero): support different variable types.
std::istringstream iss(in_var->serialized());
framework::LoDTensor t;
framework::DeserializeFromStream(iss, &t);
lodtensor_queue_.Push(std::move(t));
// Block util the sub graph is done.
t = lodtensor_return_queue_.Pop();
TensorWithName tensor_with_name =
std::make_pair(in_var->varname(), std::move(t));
var_recv_queue_.Push(std::move(tensor_with_name));
return Status::OK;
}
Status SendRecvServerImpl::GetVariable(ServerContext *context,
const VariableMessage *in_var,
VariableMessage *out_var) {
std::string get_var_name = in_var->varname();
auto *var = scope_->FindVar(get_var_name);
auto tensor = var->Get<framework::LoDTensor>();
std::ostringstream oss;
// FIXME(typhoonzero): get context from op.
framework::SerializeToStream(oss, t, platform::CPUDeviceContext());
framework::SerializeToStream(oss, tensor, platform::CPUDeviceContext());
std::string *varname = out_var->mutable_varname();
*varname = in_var->varname();
*varname = get_var_name;
std::string *serialized = out_var->mutable_serialized();
*serialized = oss.str();
return Status::OK;
}
Status SendRecvServerImpl::Wait(ServerContext *context,
const VoidMessage *in_var,
VoidMessage *out_var) {
{
std::unique_lock<std::mutex> lock(this->mutex_);
condition_.wait(lock, [=] { return this->done_ == true; });
}
return Status::OK;
}
void SendRecvServerImpl::Reset() {
std::lock_guard<std::mutex> lock(this->mutex_);
done_ = false;
}
void SendRecvServerImpl::Done() {
{
std::lock_guard<std::mutex> lock(this->mutex_);
done_ = true;
}
condition_.notify_all();
}
} // namespace detail
} // namespace operators
} // namespace paddle
......@@ -19,10 +19,10 @@ namespace operators {
namespace detail {
bool RPCClient::SendVariable(const framework::Scope& scope,
const std::string& inname,
const std::string& outname) {
const std::string& inname) {
ClientContext context;
VariableMessage msg, out_msg;
VariableMessage msg;
VoidMessage out_msg;
// FIXME(typhoonzero): pass device context to here.
auto ctx = platform::CPUDeviceContext();
auto* var = scope.FindVar(inname);
......@@ -37,9 +37,26 @@ bool RPCClient::SendVariable(const framework::Scope& scope,
msg.set_serialized(oss.str());
Status status = stub_->SendVariable(&context, msg, &out_msg);
if (!status.ok()) {
LOG(ERROR) << "gRPC error: " << status.error_message();
return false;
}
std::istringstream iss(out_msg.serialized());
return true;
}
bool RPCClient::GetVariable(const framework::Scope& scope,
const std::string& outname) {
ClientContext context;
VariableMessage call_msg, ret_msg;
call_msg.set_varname(outname);
auto ctx = platform::CPUDeviceContext();
Status status = stub_->GetVariable(&context, call_msg, &ret_msg);
if (!status.ok()) {
LOG(ERROR) << "gRPC error: " << status.error_message();
return false;
}
std::istringstream iss(ret_msg.serialized());
framework::LoDTensor ret_tensor;
framework::DeserializeFromStream(iss, &ret_tensor);
auto* outvar = scope.FindVar(outname);
......@@ -49,6 +66,12 @@ bool RPCClient::SendVariable(const framework::Scope& scope,
return true;
}
void RPCClient::Wait() {
ClientContext context;
VoidMessage call_msg, ret_msg;
stub_->Wait(&context, call_msg, &ret_msg);
}
} // namespace detail
} // namespace operators
} // namespace paddle
......@@ -19,7 +19,12 @@ package sendrecv;
service SendRecvService {
// For parameter server round-robin like hashing, do not split tensors.
// Send and recv only one tensor
rpc SendVariable(VariableMessage) returns (VariableMessage) {}
// TODO(typhoonzero): add streaming API
rpc SendVariable(VariableMessage) returns (VoidMessage) {}
// Argument VariableMessage for GetVariable should only contain varname.
rpc GetVariable(VariableMessage) returns (VariableMessage) {}
// wait for one execution of the program
rpc Wait(VoidMessage) returns (VoidMessage) {}
}
// VariableMessage is serialized paddle variable message.
......
......@@ -20,10 +20,6 @@
#include "paddle/framework/selected_rows.h"
#include "paddle/operators/detail/simple_block_queue.h"
// #include <grpc++/channel.h>
// #include <grpc++/client_context.h>
// #include <grpc++/create_channel.h>
// #include <grpc++/security/credentials.h>
#include "paddle/operators/detail/send_recv.grpc.pb.h"
#include "paddle/operators/detail/send_recv.pb.h"
......@@ -48,24 +44,32 @@ namespace paddle {
namespace operators {
namespace detail {
typedef std::pair<std::string, framework::LoDTensor> TensorWithName;
class SendRecvServerImpl final : public SendRecvService::Service {
public:
explicit SendRecvServerImpl() {}
Status SendVariable(ServerContext *context, const VariableMessage *in_var,
VoidMessage *out_var) override;
Status GetVariable(ServerContext *context, const VariableMessage *in_var,
VariableMessage *out_var) override;
Status Wait(ServerContext *context, const VoidMessage *in_var,
VoidMessage *out_var) override;
void Reset();
void Done();
void SetScope(framework::Scope *scope) { scope_ = scope; };
const framework::LoDTensor Get() { return this->lodtensor_queue_.Pop(); }
void Push(const framework::LoDTensor &tensor) {
this->lodtensor_return_queue_.Push(tensor);
}
const TensorWithName Get() { return this->var_recv_queue_.Pop(); }
private:
SimpleBlockQueue<framework::LoDTensor> lodtensor_queue_;
SimpleBlockQueue<framework::LoDTensor> lodtensor_return_queue_;
SimpleBlockQueue<framework::SelectedRows> selected_rows_queue_;
SimpleBlockQueue<framework::SelectedRows> selected_rows_return_queue_;
// received variable from RPC, operators fetch variable from this queue.
SimpleBlockQueue<TensorWithName> var_recv_queue_;
framework::Scope *scope_;
// condition of the sub program
std::mutex mutex_;
bool done_;
std::condition_variable condition_;
};
// RPCClient is a class to send tensors to pserver sub-network
......@@ -75,8 +79,9 @@ class RPCClient {
RPCClient(std::shared_ptr<Channel> channel)
: stub_(SendRecvService::NewStub(channel)) {}
bool SendVariable(const framework::Scope &scope, const std::string &inname,
const std::string &outname);
bool SendVariable(const framework::Scope &scope, const std::string &inname);
bool GetVariable(const framework::Scope &scope, const std::string &outname);
void Wait();
private:
std::unique_ptr<SendRecvService::Stub> stub_;
......
......@@ -71,7 +71,7 @@ class GPUDropoutKernel : public framework::OpKernel<T> {
auto M = EigenMatrix<T>::Reshape(*mask, 1);
Y.device(place) = X * M;
} else {
Y.device(place) = X * dropout_prob;
Y.device(place) = X * (1.0f - dropout_prob);
}
}
};
......
......@@ -57,7 +57,7 @@ class CPUDropoutKernel : public framework::OpKernel<T> {
auto Y = EigenMatrix<T>::Reshape(*y, 1);
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
Y.device(place) = X * dropout_prob;
Y.device(place) = X * (1.0f - dropout_prob);
}
}
};
......
......@@ -103,11 +103,13 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext> {
MidWiseTransformIterator<T, platform::CPUDeviceContext>& operator++() {
++j_;
i_ = j_ / post_;
if (UNLIKELY(i_ == n_)) {
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
}
return *this;
}
......@@ -125,10 +127,10 @@ class MidWiseTransformIterator<T, platform::CPUDeviceContext> {
private:
const T* ptr_;
int i_;
int64_t i_;
int64_t j_;
int64_t n_;
int post_;
int64_t post_;
};
#ifdef __NVCC__
......
......@@ -24,10 +24,10 @@ class FillZerosLikeOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of FillZerosLikeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"),
"Output(Y) of FillZerosLikeOp should not be null.");
ctx->SetOutputDim("Y", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Y");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of FillZerosLikeOp should not be null.");
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Out");
}
};
......@@ -36,7 +36,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
FillZerosLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of fill-zeros-like op.");
AddOutput("Y", "The variable will be filled up with zeros.");
AddOutput("Out", "The variable will be filled up with zeros.");
AddComment(R"DOC(
FillZerosLike Operator.
......
......@@ -23,7 +23,7 @@ template <typename DeviceContext, typename T>
class FillZerosLikeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* out = context.Output<framework::Tensor>("Y");
auto* out = context.Output<framework::Tensor>("Out");
out->mutable_data<T>(context.GetPlace());
math::SetConstant<DeviceContext, T> setter;
......
......@@ -93,13 +93,13 @@ class IncrementGradOpMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("increment");
grad_op->SetInput("X", Output("Out"));
grad_op->SetOutput("Out", Input("X"));
grad_op->SetAttr("step", -boost::get<float>(GetAttr("step")));
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -63,8 +63,8 @@ class LoDRankTableInferShape : public framework::InferShapeBase {
class LoDRankTableInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind &op_desc,
framework::BlockDescBind *block) const override {
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
for (auto &o : op_desc.Output("Out")) {
block->FindRecursiveOrCreateVar(o)->SetType(
framework::proto::VarDesc::LOD_RANK_TABLE);
......
......@@ -127,8 +127,8 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase {
class LoDTensorToArrayInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind &op_desc,
framework::BlockDescBind *block) const override {
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
for (auto &out_var : op_desc.Output("Out")) {
block->Var(out_var)->SetType(framework::proto::VarDesc::LOD_TENSOR_ARRAY);
}
......@@ -140,14 +140,14 @@ class LoDTensorToArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("array_to_lod_tensor");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetInput("RankTable", Input("RankTable"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -108,8 +108,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override {
void operator()(const framework::OpDesc& op_desc,
framework::BlockDesc* block) const override {
auto out_var_name = op_desc.Output(framework::GradVarName("W")).front();
auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr);
......
......@@ -61,14 +61,13 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
const T* im_data = im.data<T>();
T* col_data = col->data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / filter_width / filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
for (int w = 0; w < col_width; ++w) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
int col_idx = (c * col_height + h) * col_width + w;
int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx;
......@@ -130,16 +129,14 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int c_im = c / filter_width / filter_height;
int c_im = c / (filter_width * filter_height);
for (int h = 0; h < col_height; ++h) {
for (int w = 0; w < col_width; ++w) {
int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0];
for (int w = 0; w < col_width; ++w) {
int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1];
if ((im_row_idx) >= 0 && (im_row_idx) < im_height &&
(im_col_idx) >= 0 && (im_col_idx) < im_width) {
im_row_idx += c_im * im_height;
im_data[im_row_idx * im_width + im_col_idx] +=
im_data[(im_row_idx + c_im * im_height) * im_width + im_col_idx] +=
col_data[(c * col_height + h) * col_width + w];
}
}
......@@ -199,12 +196,13 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset =
((((col_row_idx)*col_width + col_col_idx) * im_channels +
channel) *
......@@ -271,12 +269,13 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
for (int channel = 0; channel < im_channels; ++channel) {
for (int filter_row_idx = 0; filter_row_idx < filter_height;
++filter_row_idx) {
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_row_offset =
col_row_idx * stride[0] + filter_row_idx - padding[0];
for (int filter_col_idx = 0; filter_col_idx < filter_width;
++filter_col_idx) {
int im_col_offset =
col_col_idx * stride[1] + filter_col_idx - padding[1];
int col_offset =
(((col_row_idx * col_width + col_col_idx) * im_channels +
channel) *
......@@ -284,6 +283,7 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
filter_row_idx) *
filter_width +
filter_col_idx;
if (im_row_offset >= 0 && im_row_offset < im_height &&
im_col_offset >= 0 && im_col_offset < im_width) {
int im_offset =
......
......@@ -67,18 +67,45 @@ void RowwiseAdd<DeviceContext, T>::operator()(const DeviceContext& context,
template <typename DeviceContext, typename T>
void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input,
framework::Tensor* vector) {
framework::Tensor* out) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector->numel(), size);
PADDLE_ENFORCE_EQ(out->numel(), size);
auto vec = framework::EigenMatrix<T>::From(*vector);
auto in = framework::EigenMatrix<T>::From(input);
Eigen::array<int, 2> shape({{1, static_cast<int>(size)}});
vec.reshape(shape).device(*context.eigen_device()) =
in.sum(Eigen::array<int, 1>({{0}})).reshape(shape);
auto vec = framework::EigenVector<T>::Flatten(*out);
vec.device(*context.eigen_device()) = in.sum(Eigen::array<int, 1>({{0}}));
}
// Specialize for CPU, since Eigen implement a general reduce. However,
// colwise-sum can be easily implemented. General reduce has a huge overhead in
// CPU
template <typename T>
class ColwiseSum<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, framework::Tensor* out) {
auto& in_dims = input.dims();
auto height = in_dims[0];
auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), size);
T* out_buf = out->mutable_data<T>(out->place());
const T* in_buf = input.data<T>();
for (size_t i = 0; i < height; ++i) {
for (size_t j = 0; j < size; ++j) {
if (i == 0) {
out_buf[j] = in_buf[i * size + j];
} else {
out_buf[j] += in_buf[i * size + j];
}
}
}
}
};
} // namespace math
} // namespace operators
} // namespace paddle
......@@ -60,13 +60,13 @@ class MeanGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto* grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* grad_op = new framework::OpDesc();
grad_op->SetType("mean_grad");
grad_op->SetInput("X", Input("X"));
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -161,15 +161,15 @@ class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("split_lod_tensor");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetInput("Mask", Input("Mask"));
grad_op->SetOutput("OutTrue", InputGrad("InTrue"));
grad_op->SetOutput("OutFalse", InputGrad("InFalse"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -70,12 +70,11 @@ class MinusGradMaker : public framework::GradOpDescMakerBase {
public:
using framework::GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<framework::OpDescBind>> operator()()
const override {
std::vector<std::unique_ptr<framework::OpDescBind>> ops;
std::vector<std::unique_ptr<framework::OpDesc>> operator()() const override {
std::vector<std::unique_ptr<framework::OpDesc>> ops;
auto x_g = InputGrad("X");
if (!x_g.empty()) {
auto *x_g_op = new framework::OpDescBind();
auto *x_g_op = new framework::OpDesc();
x_g_op->SetType("scale");
x_g_op->SetInput("X", OutputGrad("Out"));
x_g_op->SetOutput("Out", x_g);
......@@ -85,7 +84,7 @@ class MinusGradMaker : public framework::GradOpDescMakerBase {
auto y_g = InputGrad("Y");
if (!y_g.empty()) {
auto *y_g_op = new framework::OpDescBind();
auto *y_g_op = new framework::OpDesc();
y_g_op->SetType("scale");
y_g_op->SetInput("X", OutputGrad("Out"));
y_g_op->SetOutput("Out", y_g);
......
......@@ -73,39 +73,50 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op");
AddInput("Y", "The second input of mul op");
AddOutput("Out", "The output of mul op");
AddInput("X", "(Tensor), The first input tensor of mul op.");
AddInput("Y", "(Tensor), The second input tensor of mul op.");
AddOutput("Out", "(Tensor), The output tensor of mul op.");
AddAttr<int>(
"x_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `X`,
in that case, tensors will be reshaped to a matrix. The matrix's first
dimension(column length) will be the product of tensor's last
`num_col_dims` dimensions, and the matrix's second dimension(row length)
will be the product of tensor's first `rank - num_col_dims` dimensions.
R"DOC((int, default 1), The mul_op can take tensors with more than two
dimensions as its inputs. If the input $X$ is a tensor with more
than two dimensions, $X$ will be flattened into a two-dimensional
matrix first. The flattening rule is: the first `num_col_dims`
will be flattened to form the first dimension of the final matrix
(the height of the matrix), and the rest `rank(X) - num_col_dims`
dimensions are flattened to form the second dimension of the final
matrix (the width of the matrix). As a result, height of the
flattened matrix is equal to the product of $X$'s first
`x_num_col_dims` dimensions' sizes, and width of the flattened
matrix is equal to the product of $X$'s last `rank(x) - num_col_dims`
dimensions' size. For example, suppose $X$ is a 6-dimensional
tensor with the shape [2, 3, 4, 5, 6], and `x_num_col_dims` = 3.
Thus, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] =
[24, 30].
)DOC")
.SetDefault(1)
.EqualGreaterThan(1);
AddAttr<int>(
"y_num_col_dims",
"(int, default 1) "
R"DOC(mul_op can take tensors with more than two dimensions as input `Y`,
in that case, tensors will be reshaped to a matrix. Just like input `X`.
R"DOC((int, default 1), The mul_op can take tensors with more than two,
dimensions as its inputs. If the input $Y$ is a tensor with more
than two dimensions, $Y$ will be flattened into a two-dimensional
matrix first. The attribute `y_num_col_dims` determines how $Y$ is
flattened. See comments of `x_num_col_dims` for more details.
)DOC")
.SetDefault(1)
.EqualGreaterThan(1);
AddComment(R"DOC(
Mul Operator.
This operator is used to perform matrix multiplication for input X and Y.
This operator is used to perform matrix multiplication for input $X$ and $Y$.
The equation is:
$$Out = X * Y$$
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input `X`.
Both the input $X$ and $Y$ can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input $X$.
)DOC");
}
......
......@@ -65,7 +65,7 @@ class NCCLTester : public ::testing::Test {
}
void NCCLInitOp() {
std::unique_ptr<f::OpDescBind> op1(new f::OpDescBind);
std::unique_ptr<f::OpDesc> op1(new f::OpDesc);
op1->SetType("ncclInit");
op1->SetOutput("Communicator", {"comm"});
......@@ -81,10 +81,9 @@ class NCCLTester : public ::testing::Test {
}
template <class T>
void PerThreadProgram(int gpu_id, const f::OpDescBind &op_desc,
f::Scope *scope) {
void PerThreadProgram(int gpu_id, const f::OpDesc &op_desc, f::Scope *scope) {
std::unique_lock<std::mutex> lk(mu);
const f::OpDescBind *op1 = &op_desc;
const f::OpDesc *op1 = &op_desc;
p::GPUPlace place(gpu_id);
auto &ctx = dev_ctxs.at(gpu_id);
......@@ -125,7 +124,7 @@ class NCCLTester : public ::testing::Test {
// ncclInitOp with desc
TEST(NCCL, ncclInitOp) {
std::unique_ptr<f::OpDescBind> op_desc(new f::OpDescBind);
std::unique_ptr<f::OpDesc> op_desc(new f::OpDesc);
op_desc->SetType("ncclInit");
op_desc->SetOutput("Communicator", {"x1"});
......@@ -145,7 +144,7 @@ TEST(NCCL, ncclInitOp) {
// ncclAllReduceOp with desc
TEST_F(NCCLTester, ncclAllReduceOp) {
std::unique_ptr<f::OpDescBind> op2(new f::OpDescBind);
std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
op2->SetType("ncclAllReduce");
op2->SetInput("X", {"st"});
op2->SetInput("Communicator", {"comm"});
......@@ -192,7 +191,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) {
// ncclReduceOp with desc
TEST_F(NCCLTester, ncclReduceOp) {
std::unique_ptr<f::OpDescBind> op2(new f::OpDescBind);
std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
const int kRoot = 0;
op2->SetType("ncclReduce");
op2->SetInput("X", {"st"});
......@@ -240,7 +239,7 @@ TEST_F(NCCLTester, ncclReduceOp) {
// ncclBcastOp with desc
TEST_F(NCCLTester, ncclBcastOp) {
std::unique_ptr<f::OpDescBind> op2(new f::OpDescBind);
std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
const int kRoot = 5;
op2->SetType("ncclBcast");
op2->SetInput("X", {"st"});
......
# Standard Markdown Format for Operators
The following should be the standard format for documentation for all the operators that will get rendered in the `html`:
```
Operator Name (In PaddlePaddle)
Operator Name (Standard)
Operator description.
LaTeX equation of how the operator performs an update.
The signature of the operator.
```
Each section mentioned above has been covered in further detail in the rest of the document.
# PaddlePaddle Operator Name
This should be in all small letters, in case of multiple words, we separate them with an underscore. For example:
`array to lod tensor` should be written as `array_to_lod_tensor`.
This naming convention should be standard across all PaddlePaddle operators.
# Standard Operator Name
This is the standard name of the operator as used in the community. The general standard is usually:
- Standard abbreviations like `SGD` are written in all capital letters.
- Operator names that have multiple words inside a single word use `camelCase` (capitalize word boundaries inside of a word).
- Keep numbers inside a word as is, with no boundary delimiters.
- Follow the name of the operator with the keyword: `Activation Operator.`
# Operator description
This section should contain the description of what the operator does, including the operation performed, the literature from where it comes and was introduced first, and other important details. The relevant paper/article including the hyperlink should be cited in this section.
# LaTeX equation
This section should contain an overall equation of the update or operation that the operator performs. The variables used in the equation should follow the naming convention of operators as described [here](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md). Two words in the same word should be separated by an underscore (`_`).
# The signature
This section describes the signature of the operator. A list of Inputs and Outputs, each of which have a small description of what the variable represents and the type of variable. The variable names follow the `CamelCase` naming convention. The proposed format for this is:
`Section :
VariableName : (VariableType) VariableDescription
...
...
`
The following example for an `sgd` operator covers the above mentioned sections as they would ideally look like in the `html`:
```
sgd
SGD operator
This operator implements one step of the stochastic gradient descent algorithm.
param_out = param_learning_rate * grad
Inputs:
Param : (Tensor) Input parameter
LearningRate : (Tensor) Learning rate of SGD
Grad : (Tensor) Input gradient
Outputs:
ParamOut : (Tensor) Output parameter
```
......@@ -116,14 +116,14 @@ class PadOpGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto* bind = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* bind = new framework::OpDesc();
bind->SetInput("X", Input("X"));
bind->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
bind->SetOutput(framework::GradVarName("X"), InputGrad("X"));
bind->SetAttrMap(Attrs());
bind->SetType("pad_grad");
return std::unique_ptr<framework::OpDescBind>(bind);
return std::unique_ptr<framework::OpDesc>(bind);
}
};
......
......@@ -234,7 +234,7 @@ class RecurrentOp : public RecurrentBase {
auto reverse = Attr<bool>(kReverse);
framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock);
auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program();
for (size_t i = 0; i < seq_len; ++i) {
......@@ -317,7 +317,7 @@ class RecurrentGradOp : public RecurrentBase {
auto reverse = Attr<bool>(kReverse);
framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock);
auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program();
for (size_t step_id = 0; step_id < seq_len; ++step_id) {
......@@ -522,8 +522,7 @@ The ex-state means the state value in the ex-timestep or the previous time step
string::Sprintf(
"The state variable names. [%s, %s, %s] must be the same order",
kExStates, kStates, kInitStateGrads));
AddAttr<framework::BlockDescBind *>(kStepBlock,
"The step block inside RNN");
AddAttr<framework::BlockDesc *>(kStepBlock, "The step block inside RNN");
AddAttr<bool>(kReverse, R"DOC(Calculate RNN reversely or not.
By default reverse=False
......@@ -565,13 +564,13 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
virtual std::unique_ptr<framework::OpDescBind> Apply() const {
auto *grad = new framework::OpDescBind();
virtual std::unique_ptr<framework::OpDesc> Apply() const {
auto *grad = new framework::OpDesc();
grad->SetType("recurrent_grad");
for (auto &input_param : this->InputNames()) {
grad->SetInput(input_param, this->Input(input_param));
grad->SetOutput(framework::GradVarName(input_param),
this->InputGrad(input_param));
this->InputGrad(input_param, false));
}
for (auto &output_param : this->OutputNames()) {
......@@ -588,7 +587,7 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker {
grad->SetAttrMap(this->Attrs());
grad->SetBlockAttr(kStepBlock, *grad_block_[0]);
return std::unique_ptr<framework::OpDescBind>(grad);
return std::unique_ptr<framework::OpDesc>(grad);
}
};
......
......@@ -24,6 +24,7 @@
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/proto_desc.h"
#include "paddle/operators/detail/send_recv_impl.h"
#include "paddle/operators/detail/simple_block_queue.h"
......@@ -61,29 +62,76 @@ class RecvOp : public framework::OperatorBase {
server_thread_->join();
}
std::string GetGradVarNameForTrainer(const std::string &varname) const {
if (grads_counter_.find(varname) == grads_counter_.end()) {
grads_counter_[varname] = 0;
}
char ret[256];
snprintf(ret, sizeof(ret), "%s.trainer_%d", varname.c_str(),
grads_counter_[varname]++);
return std::string(ret);
}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
// blocking get one var from client.
const framework::LoDTensor &t = rpc_service_->Get();
// FIXME(typhoonzero): no new scopes for every run.
framework::Scope &recv_scope = scope.NewScope();
// set graph input var
auto *var = recv_scope.Var(Input("RX"));
rpc_service_->SetScope(&recv_scope);
auto param_list = Attr<std::vector<std::string>>("ParamList");
auto grad_list = Attr<std::vector<std::string>>("GradList");
auto trainer_count = Attr<int>("Trainers");
size_t param_count = param_list.size();
rpc_service_->Reset();
// TODO(typhoonzero): change this to a while_op for every cluster-batch.
while (true) {
// Get from multiple trainers, we don't care about order in which
// the gradient arrives, just add suffix 0~n then average the gradient.
for (size_t i = 0; i < param_count * trainer_count; ++i) {
// blocking get one var from client.
const detail::TensorWithName &v = rpc_service_->Get();
auto grad_var_name = v.first;
auto it = std::find(grad_list.begin(), grad_list.end(), grad_var_name);
std::string param_var_name;
if (it != grad_list.end()) {
param_var_name = param_list[it - grad_list.begin()];
} else {
LOG(ERROR) << "grad have no paired param found!";
}
VLOG(3) << "recved grad: " << grad_var_name
<< " updating param: " << param_var_name;
auto *merged_grad = recv_scope.FindVar(grad_var_name);
if (merged_grad == nullptr) {
// create output of merged var.
auto merged_var = recv_scope.Var(grad_var_name);
merged_var->GetMutable<framework::LoDTensor>();
}
if (trainer_count > 1) {
grad_var_name = this->GetGradVarNameForTrainer(grad_var_name);
}
auto *var = recv_scope.Var(grad_var_name);
auto *tensor = var->GetMutable<framework::LoDTensor>();
// FIXME(typhoonzero): do not copy
framework::CopyFrom(t, dev_ctx.GetPlace(), dev_ctx, tensor);
framework::CopyFrom(v.second, dev_ctx.GetPlace(), dev_ctx, tensor);
}
rpc_service_->Reset();
std::string program_str = Attr<std::string>("OptimizeProgram");
framework::ProgramDesc program_desc;
framework::proto::ProgramDesc program_desc;
program_desc.ParseFromString(program_str);
framework::ProgramDescBind program(program_desc);
framework::ProgramDesc program(program_desc);
framework::Executor executor(dev_ctx);
// Run sub graph to get optimized tensor
try {
executor.Run(program, &recv_scope, 0, /*global_block*/
false /*create_local_scope*/);
auto *out_var = recv_scope.FindVar("Out");
// push back
rpc_service_->Push(out_var->Get<framework::LoDTensor>());
false /*create_local_scope*/, false /*create_vars*/);
} catch (std::exception &e) {
LOG(ERROR) << "run sub program error " << e.what();
}
rpc_service_->Done();
grads_counter_.clear();
} // while(true)
}
protected:
......@@ -93,13 +141,14 @@ class RecvOp : public framework::OperatorBase {
// grpc send/recv service implement to register.
std::shared_ptr<detail::SendRecvServerImpl> rpc_service_;
std::shared_ptr<std::thread> server_thread_;
mutable std::unordered_map<std::string, int> grads_counter_;
};
class RecvOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RecvOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("RX", "(Tensor) Input tensor to be saved");
AddInput("RX", "(Tensor) Input tensor to be optimized").AsDuplicable();
AddComment(R"DOC(
Recv operator
......@@ -112,6 +161,17 @@ This operator will recv tensor from send_op
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
AddAttr<std::string>("OptimizeProgram", "type string",
"Serialized ProgramDesc string for recv to run.");
AddAttr<std::vector<std::string>>(
"ParamList", "type list of string",
"grad->param name mapping to find which param to optimize.")
.SetDefault({});
AddAttr<std::vector<std::string>>(
"GradList", "type list of string",
"grad->param name mapping to find which param to optimize.")
.SetDefault({});
AddAttr<int>("Trainers", "type int",
"Number of trainers in the current cluster job")
.SetDefault(1);
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/lod_rank_table.h>
#include "paddle/framework/op_registry.h"
#include "paddle/operators/detail/safe_ref.h"
namespace paddle {
namespace operators {
class ReorderLoDTensorByRankTableOpProtoMaker
: public framework::OpProtoAndCheckerMaker {
public:
ReorderLoDTensorByRankTableOpProtoMaker(OpProto *proto,
OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor) the input lod tensor need to be reordered.");
AddInput("RankTable",
"(LoDRankTable) the rank table that input need follow");
AddOutput("Out", "(LoDTensor) reordered lod tensor");
AddComment(R"DOC(ReorderLoDTensorByRankTable
Reorder the input X by the rank of `RankTable`. If `RankTable` is ordered by
index [3, 0, 2, 1]. Input X will reorder its sequence, the third sequence of
X will be the first sequence of Output.
NOTE: The RankTable does not need to be calculated by X.
For example:
The X = [Seq0, Seq1, Seq2, Seq3]. The indices of RankTable are [3, 0, 2, 1].
The Out = [Seq3, Seq0, Seq2, Seq1] with correct LoD information.
)DOC");
}
};
class ReorderLoDTensorByRankTableBase : public framework::OperatorBase {
public:
ReorderLoDTensorByRankTableBase(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto &x =
detail::Ref(scope.FindVar(Input("X")),
"Cannot find input lod tensor variable %s", Input("X"))
.Get<framework::LoDTensor>();
auto &rank_table = detail::Ref(scope.FindVar(Input("RankTable")),
"Cannot find input rank table variable %s",
Input("RankTable"))
.Get<framework::LoDRankTable>();
auto &out =
*detail::Ref(scope.FindVar(Output("Out")),
"Cannot find output lod tensor variable %s", Output("Out"))
.GetMutable<framework::LoDTensor>();
out.Resize(x.dims());
out.mutable_data(x.place(), x.type());
this->process(dev_ctx, x, rank_table, &out);
}
protected:
virtual void process(const platform::DeviceContext &dev_ctx,
const framework::LoDTensor &x,
const framework::LoDRankTable &rank_table,
framework::LoDTensor *out) const = 0;
struct AbsoluteRankTableItem {
size_t offset; // the absolute/accumulated offset.
size_t length; // the length
framework::LoD lod;
};
std::vector<AbsoluteRankTableItem> GetAbsoluteOffsetAndLengthByLoDRankTable(
const framework::LoDTensor &x) const {
std::vector<AbsoluteRankTableItem> absolute_table;
size_t level = 0;
size_t size = x.lod()[level].size();
for (size_t i = 0; i < size - 1; ++i) {
auto lod_offset =
framework::GetSubLoDAndAbsoluteOffset(x.lod(), i, i + 1, level);
auto &offset = lod_offset.second;
absolute_table.emplace_back();
absolute_table.back().length = offset.second - offset.first;
absolute_table.back().offset = offset.first;
absolute_table.back().lod = lod_offset.first;
}
return absolute_table;
}
size_t CopyTensorAndLod(const platform::DeviceContext &dev_ctx,
const AbsoluteRankTableItem &item,
const framework::LoDTensor &x,
framework::LoDTensor *out, size_t out_offset) const {
auto &out_lod = *out->mutable_lod();
auto len = item.length;
auto x_offset = item.offset;
if (out_lod.empty()) {
for (size_t i = 0; i < item.lod.size(); ++i) {
out_lod.push_back(std::vector<size_t>({0}));
}
}
for (size_t i = 0; i < out_lod.size(); ++i) {
auto &out_v = out_lod[i];
auto &new_lod_v = item.lod[i];
for (auto &detail : new_lod_v) {
out_v.push_back(out_v.back() + detail);
}
}
auto x_sliced = x.Slice(x_offset, x_offset + len);
auto out_sliced = out->Slice(out_offset, out_offset + len);
framework::CopyFrom(x_sliced, out_sliced.place(), dev_ctx, &out_sliced);
out_offset += len;
return out_offset;
}
};
class ReorderLoDTensorByRankTableOp : public ReorderLoDTensorByRankTableBase {
public:
ReorderLoDTensorByRankTableOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: ReorderLoDTensorByRankTableBase(type, inputs, outputs, attrs) {}
protected:
void process(const platform::DeviceContext &dev_ctx,
const framework::LoDTensor &x,
const framework::LoDRankTable &rank_table,
framework::LoDTensor *out) const override {
auto absolute_table = GetAbsoluteOffsetAndLengthByLoDRankTable(x);
size_t out_offset = 0;
out->mutable_lod()->clear();
for (auto &item : rank_table.items()) {
PADDLE_ENFORCE_LT(item.index, absolute_table.size());
out_offset = CopyTensorAndLod(dev_ctx, absolute_table[item.index], x, out,
out_offset);
}
}
};
class IdentityInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
context->SetOutputDim("Out", context->GetInputDim("X"));
}
};
class ReorderLodTensorByRankGradOpMaker
: public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("reorder_lod_tensor_by_rank_grad");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetInput("RankTable", Input("RankTable"));
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
class ReorderLoDTensorByRankGradOp : public ReorderLoDTensorByRankTableBase {
public:
ReorderLoDTensorByRankGradOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: ReorderLoDTensorByRankTableBase(type, inputs, outputs, attrs) {}
protected:
void process(const platform::DeviceContext &dev_ctx,
const framework::LoDTensor &x,
const framework::LoDRankTable &rank_table,
framework::LoDTensor *out) const override {
auto absolute_table = GetAbsoluteOffsetAndLengthByLoDRankTable(x);
// offsets = enumerate([item.index for item in rank_table.items()])
std::vector<std::pair<size_t, size_t>> offsets;
offsets.reserve(rank_table.items().size());
for (size_t i = 0; i < rank_table.items().size(); ++i) {
offsets.push_back({i, rank_table.items()[i].index});
}
// offsets.sort(key=lambda x: x[1])
std::sort(
offsets.begin(), offsets.end(),
[](const std::pair<size_t, size_t> &a,
const std::pair<size_t, size_t> &b) { return a.second < b.second; });
// Copy TensorAndLod
size_t out_offset = 0;
for (auto &offset : offsets) {
out_offset = this->CopyTensorAndLod(dev_ctx, absolute_table[offset.first],
x, out, out_offset);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(reorder_lod_tensor_by_rank,
ops::ReorderLoDTensorByRankTableOp,
ops::ReorderLodTensorByRankGradOpMaker,
ops::ReorderLoDTensorByRankTableOpProtoMaker,
ops::IdentityInferShape);
REGISTER_OPERATOR(reorder_lod_tensor_by_rank_grad,
ops::ReorderLoDTensorByRankGradOp, ops::IdentityInferShape);
......@@ -58,13 +58,13 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("scale");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", GetAttr("scale"));
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -34,45 +34,56 @@ class SendOp : public framework::OperatorBase {
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {
// init client when the operator is created at runtime.
if (!client_) {
std::string endpoint = Attr<std::string>("endpoint");
client_.reset(new detail::RPCClient(
grpc::CreateChannel(endpoint, grpc::InsecureChannelCredentials())));
// TODO(typhoonzero): how to call InitVariables
std::vector<std::string> endpoints =
Attr<std::vector<std::string>>("endpoints");
for (auto ep : endpoints) {
client_map_[ep].reset(new detail::RPCClient(
grpc::CreateChannel(ep, grpc::InsecureChannelCredentials())));
}
}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto iname = Input("X");
auto oname = Output("Out");
// TODO(typhoonzero): currently it's non-blocking,
// should block until server responds.
bool ret = client_->SendVariable(scope, iname, oname);
auto ins = Inputs("X");
std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap");
// TODO(typhoonzero): use async calls to send multiple variable asyncly.
for (size_t i = 0; i < ins.size(); ++i) {
bool ret = client_map_[epmap[i]]->SendVariable(scope, ins[i]);
if (!ret) {
LOG(ERROR) << "send variable error";
LOG(ERROR) << "send variable error: " << ins[i];
}
}
// TODO(typhoonzero): support async optimization
client_map_[epmap[0]]->Wait();
for (size_t i = 0; i < ins.size(); ++i) {
bool ret = client_map_[epmap[i]]->GetVariable(scope, ins[i]);
if (!ret) {
LOG(ERROR) << "GetVariable error: " << ins[i];
}
}
}
protected:
std::shared_ptr<detail::RPCClient> client_{nullptr};
mutable std::unordered_map<std::string, std::shared_ptr<detail::RPCClient>>
client_map_;
};
class SendOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SendOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor to be saved");
AddOutput("Out", "(Tensor) Output fetched from server");
AddInput("X", "(Tensor) Input tensor to be send").AsDuplicable();
AddComment(R"DOC(
Recv operator
This operator will recv tensor from send_op
)DOC");
AddAttr<std::string>("endpoint",
"(string, default 127.0.0.1:6164)"
"IP address to listen on.")
.SetDefault("127.0.0.1:6164")
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
AddAttr<std::vector<std::string>>("endpoints",
"(string vector, default 127.0.0.1:6164)"
"Server endpoints to send variables to.");
AddAttr<std::vector<std::string>>("epmap",
"(string vector, default 127.0.0.1:6164)"
"Server endpoints in the order of input "
"variables for mapping");
}
};
......
......@@ -16,12 +16,14 @@
// a RemoteOptimizer.
#include <unistd.h>
#include <string>
#include <thread>
#include "gtest/gtest.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h"
#include "paddle/string/printf.h"
USE_NO_KERNEL_OP(send);
USE_NO_KERNEL_OP(recv);
......@@ -33,30 +35,33 @@ std::unique_ptr<paddle::framework::OperatorBase> recv_op;
void InitTensorsInScope(paddle::framework::Scope &scope,
paddle::platform::CPUPlace &place) {
paddle::platform::CPUDeviceContext ctx(place);
auto var = scope.Var("X");
for (int i = 0; i < 2; ++i) {
auto var_name = paddle::string::Sprintf("x%d", i);
auto var = scope.Var(var_name);
auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
tensor->Resize({10, 10});
float *expect = tensor->mutable_data<float>(place);
for (int64_t i = 0; i < tensor->numel(); ++i) {
expect[i] = static_cast<float>(i);
}
}
auto out_var = scope.Var("Out");
auto out_tensor = out_var->GetMutable<paddle::framework::LoDTensor>();
out_tensor->Resize({10, 10});
tensor->mutable_data<float>(place); // allocate
out_tensor->mutable_data<float>(place); // allocate
}
void AddOp(const std::string &type,
const paddle::framework::VariableNameMap &inputs,
const paddle::framework::VariableNameMap &outputs,
paddle::framework::AttributeMap attrs,
paddle::framework::BlockDescBind *block) {
paddle::framework::BlockDesc *block) {
// insert output
for (auto kv : outputs) {
for (auto v : kv.second) {
auto var = block->Var(v);
var->SetDataType(paddle::framework::DataType::FP32);
var->SetDataType(paddle::framework::proto::DataType::FP32);
}
}
......@@ -78,10 +83,10 @@ void StartServerNet() {
InitTensorsInScope(scope, place);
// sub program run in recv_op, for simple test we use sum
paddle::framework::ProgramDescBind program;
paddle::framework::BlockDescBind *block = program.MutableBlock(0);
paddle::framework::ProgramDesc program;
paddle::framework::BlockDesc *block = program.MutableBlock(0);
// X for server side tensors, RX for received tensers, must be of same shape.
AddOp("sum", {{"X", {"X", "RX"}}}, {{"Out", {"Out"}}}, {}, block);
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, block);
paddle::framework::AttributeMap attrs;
attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
......@@ -89,8 +94,8 @@ void StartServerNet() {
PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto));
attrs.insert({"OptimizeProgram", program_proto});
recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}},
{{"Out", {"Out"}}}, attrs);
recv_op = paddle::framework::OpRegistry::CreateOp(
"recv", {{"RX", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs);
paddle::platform::CPUDeviceContext ctx(place);
recv_op->Run(scope, ctx);
}
......@@ -107,11 +112,11 @@ TEST(SendRecvOp, CPU) {
attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
auto send_op = paddle::framework::OpRegistry::CreateOp(
"send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs);
"send", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs);
paddle::platform::CPUDeviceContext ctx(place);
send_op->Run(scope, ctx);
auto in_var = scope.Var("X");
auto in_var = scope.Var("x0");
auto tensor = in_var->GetMutable<paddle::framework::LoDTensor>();
float *expected = tensor->data<float>();
......
......@@ -124,8 +124,9 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sequence_concat, ops::SequenceConcatOp, ops::SequenceConcatOpMaker,
sequence_concat_grad, ops::SequenceConcatGradOp);
REGISTER_OP_EX(sequence_concat, ops::SequenceConcatOp,
ops::SequenceConcatOpMaker, sequence_concat_grad,
ops::SequenceConcatGradOp, false);
REGISTER_OP_CPU_KERNEL(
sequence_concat,
ops::SequenceConcatOpKernel<paddle::platform::CPUDeviceContext, float>);
......
......@@ -50,10 +50,14 @@ input Tensor can be either [N, 1] or [N], where N is the sum of the length
of all sequences.
The algorithm works as follows:
for i-th sequence in a mini-batch:
$$Out(X[lod[i]:lod[i+1]], :) =
\frac{\exp(X[lod[i]:lod[i+1], :])}
{\sum(\exp(X[lod[i]:lod[i+1], :]))}$$
$$
Out(X[lod[i]:lod[i+1]], :) = \
\frac{\exp(X[lod[i]:lod[i+1], :])} \
{\sum(\exp(X[lod[i]:lod[i+1], :]))}
$$
For example, for a mini-batch of 3 sequences with variable-length,
each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
......
......@@ -136,14 +136,14 @@ class ShrinkRNNGradOpMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *op = new framework::OpDesc();
op->SetType("shrink_rnn_memory_grad");
op->SetInput("X", Input("X"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(op);
return std::unique_ptr<framework::OpDesc>(op);
}
};
......
......@@ -50,13 +50,13 @@ class SignGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("scale");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", 0.0f);
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -173,8 +173,8 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto* grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* grad_op = new framework::OpDesc();
grad_op->SetType("softmax_with_cross_entropy_grad");
grad_op->SetInput("Label", Input("Label"));
grad_op->SetInput("Softmax", Output("Softmax"));
......@@ -183,7 +183,7 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker {
grad_op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -163,8 +163,8 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("merge_lod_tensor");
grad_op->SetInput("InTrue", OutputGrad("OutTrue"));
grad_op->SetInput("InFalse", OutputGrad("OutFalse"));
......@@ -172,7 +172,7 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker {
grad_op->SetInput("X", Input("X"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -108,13 +108,13 @@ class SplitGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto op = new framework::OpDesc();
op->SetType("concat");
op->SetInput("X", OutputGrad("Out"));
op->SetOutput("Out", InputGrad("X"));
op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(op);
return std::unique_ptr<framework::OpDesc>(op);
}
};
......
......@@ -85,8 +85,10 @@ TEST(StridedMemcpy, GPUCrop) {
platform::GPUPlace gpu0(0);
platform::CPUPlace cpu;
platform::CUDADeviceContext ctx(gpu0);
int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src), ctx.stream());
framework::DDim src_stride({5, 1});
......@@ -96,7 +98,6 @@ TEST(StridedMemcpy, GPUCrop) {
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({2, 1});
platform::CUDADeviceContext ctx(gpu0);
StridedMemcpy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride,
gpu_dst);
......@@ -122,9 +123,10 @@ TEST(StridedMemcpy, GPUConcat) {
platform::GPUPlace gpu0(0);
platform::CPUPlace cpu;
platform::CUDADeviceContext ctx(gpu0);
int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src), ctx.stream());
int dst[8];
int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
......@@ -132,7 +134,6 @@ TEST(StridedMemcpy, GPUConcat) {
framework::DDim src_stride({2, 1});
framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({4, 1});
platform::CUDADeviceContext ctx(gpu0);
StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride, gpu_dst);
StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride,
......
......@@ -115,8 +115,8 @@ the LoD information with the first input.
class SumOpVarTypeInference : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override {
void operator()(const framework::OpDesc& op_desc,
framework::BlockDesc* block) const override {
auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarDesc::SELECTED_ROWS;
......@@ -169,20 +169,19 @@ class SumGradMaker : public framework::GradOpDescMakerBase {
public:
using framework::GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<framework::OpDescBind>> operator()()
const override {
auto x_grads = InputGrad("X");
std::vector<std::unique_ptr<framework::OpDescBind>> grad_ops;
std::vector<std::unique_ptr<framework::OpDesc>> operator()() const override {
auto x_grads = InputGrad("X", false);
std::vector<std::unique_ptr<framework::OpDesc>> grad_ops;
grad_ops.reserve(x_grads.size());
auto og = OutputGrad("Out");
std::transform(x_grads.begin(), x_grads.end(), std::back_inserter(grad_ops),
[&og](const std::string& x_grad) {
auto* grad_op = new framework::OpDescBind();
auto* grad_op = new framework::OpDesc();
grad_op->SetType("scale");
grad_op->SetInput("X", og);
grad_op->SetOutput("Out", {x_grad});
grad_op->SetAttr("scale", 1.0f);
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
});
return grad_ops;
}
......
......@@ -96,8 +96,8 @@ class WriteToArrayInferShape : public framework::InferShapeBase {
class WriteToArrayInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind &op_desc,
framework::BlockDescBind *block) const override {
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
auto x_name = op_desc.Input("X")[0];
auto out_name = op_desc.Output("Out")[0];
VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
......@@ -175,14 +175,14 @@ class WriteToArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("read_from_array");
grad_op->SetInput("I", Input("I"));
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......@@ -191,14 +191,14 @@ class ReadFromArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDesc();
grad_op->SetType("write_to_array");
grad_op->SetInput("I", Input("I"));
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -46,7 +46,7 @@ class WhileOp : public framework::OperatorBase {
PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1}));
framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock);
auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program();
auto step_scopes =
......@@ -82,7 +82,7 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
"(StepScopeVar) A vector of local scope, which size equals the "
"step number of While Op. The i'th scope storages temporary "
"variables generated in the i'th step.");
AddAttr<framework::BlockDescBind *>(kStepBlock,
AddAttr<framework::BlockDesc *>(kStepBlock,
"The step block inside WhileOp");
AddComment(R"DOC(
)DOC");
......@@ -99,7 +99,7 @@ class WhileGradOp : public framework::OperatorBase {
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock);
auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program();
auto *step_scopes =
......@@ -209,8 +209,8 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad = new framework::OpDescBind();
std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad = new framework::OpDesc();
grad->SetType("while_grad");
grad->SetInput(kParameters, Input(kParameters));
......@@ -279,14 +279,14 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
// while operator could be renamed.
grad->SetAttr("original_output_grad", extra_inputs_list);
return std::unique_ptr<framework::OpDescBind>(grad);
return std::unique_ptr<framework::OpDesc>(grad);
}
};
class WhileGradOpVarTypeInference : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind &op_desc,
framework::BlockDescBind *block) const override {
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
auto p_names = op_desc.Input(kParameters);
auto pg_names = op_desc.Output(framework::GradVarName(kParameters));
......
......@@ -19,7 +19,7 @@ CPUDeviceContext::CPUDeviceContext() {
eigen_device_.reset(new Eigen::DefaultDevice());
}
CPUDeviceContext::CPUDeviceContext(CPUPlace place) {
CPUDeviceContext::CPUDeviceContext(CPUPlace place) : place_(place) {
eigen_device_.reset(new Eigen::DefaultDevice());
}
......@@ -27,7 +27,7 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
return eigen_device_.get();
}
Place CPUDeviceContext::GetPlace() const { return CPUPlace(); }
Place CPUDeviceContext::GetPlace() const { return place_; }
#ifdef PADDLE_WITH_CUDA
......
......@@ -45,6 +45,7 @@ class CPUDeviceContext : public DeviceContext {
Place GetPlace() const override;
private:
CPUPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
};
......
......@@ -97,17 +97,6 @@ void GpuMemcpyAsync(void *dst, const void *src, size_t count,
"cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync");
}
void GpuMemcpySync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind) {
PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind),
"cudaMemcpy failed in paddle::platform::GpuMemcpySync");
// note: cudaMemcpy may actually be asynchronous with respect to the caller,
// block on stream 0 to make sure the copy has completed
PADDLE_ENFORCE(
cudaStreamSynchronize(0),
"cudaStreamSynchronize failed in paddle::platform::GpuMemcpySync");
}
void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
size_t count, cudaStream_t stream) {
PADDLE_ENFORCE(
......
......@@ -52,10 +52,6 @@ size_t GpuMaxChunkSize();
void GpuMemcpyAsync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind, cudaStream_t stream);
//! Copy memory from address src to dst synchronously.
void GpuMemcpySync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind);
//! Copy memory from one device to another device.
void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
size_t count, cudaStream_t stream);
......
......@@ -53,11 +53,11 @@ TEST(Transform, GPUUnary) {
CUDADeviceContext ctx(gpu0);
float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4};
float* gpu_buf = static_cast<float*>(Alloc(gpu0, sizeof(float) * 4));
Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf));
Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf), ctx.stream());
Transform<paddle::platform::CUDADeviceContext> trans;
trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, Scale<float>(10));
ctx.Wait();
Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf));
Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf), ctx.stream());
Free(gpu0, gpu_buf);
for (int i = 0; i < 4; ++i) {
ASSERT_NEAR(cpu_buf[i], static_cast<float>(i + 1), 1e-5);
......@@ -83,11 +83,11 @@ TEST(Transform, GPUBinary) {
GPUPlace gpu0(0);
CUDADeviceContext ctx(gpu0);
int* gpu_buf = static_cast<int*>(Alloc(gpu0, sizeof(buf)));
Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf));
Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf), ctx.stream());
Transform<paddle::platform::CUDADeviceContext> trans;
trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply<int>());
ctx.Wait();
Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf));
Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf), ctx.stream());
Free(gpu0, gpu_buf);
for (int i = 0; i < 4; ++i) {
ASSERT_EQ((i + 1) * (i + 1), buf[i]);
......
......@@ -108,21 +108,21 @@ static py::bytes SerializeMessage(T &self) {
// Bind Methods
void BindProgramDesc(py::module &m) {
py::class_<ProgramDescBind>(m, "ProgramDesc", "")
py::class_<ProgramDesc>(m, "ProgramDesc", "")
.def(py::init<>())
.def("__init__",
[](ProgramDescBind &self, const ProgramDescBind &other) {
new (&self) ProgramDescBind(other);
[](ProgramDesc &self, const ProgramDesc &other) {
new (&self) ProgramDesc(other);
})
.def("__init__",
[](ProgramDescBind &self, const py::bytes &binary_str) {
[](ProgramDesc &self, const py::bytes &binary_str) {
std::string str(binary_str);
new (&self) ProgramDescBind(str);
new (&self) ProgramDesc(str);
})
.def("append_block", &ProgramDescBind::AppendBlock,
.def("append_block", &ProgramDesc::AppendBlock,
py::return_value_policy::reference)
.def("append_backward",
[](ProgramDescBind &program_desc, const VarDescBind &target,
[](ProgramDesc &program_desc, const VarDesc &target,
const std::unordered_set<std::string> &no_grad_vars) {
ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars);
......@@ -138,12 +138,12 @@ void BindProgramDesc(py::module &m) {
}
return retv;
})
.def("block", &ProgramDescBind::MutableBlock,
.def("block", &ProgramDesc::MutableBlock,
py::return_value_policy::reference)
.def("num_blocks", &ProgramDescBind::Size)
.def("serialize_to_string", SerializeMessage<ProgramDescBind>)
.def("num_blocks", &ProgramDesc::Size)
.def("serialize_to_string", SerializeMessage<ProgramDesc>)
.def("parse_from_string",
[](ProgramDescBind &program_desc, const std::string &data) {
[](ProgramDesc &program_desc, const std::string &data) {
proto::ProgramDesc *desc = program_desc.Proto();
PADDLE_ENFORCE(desc->ParseFromString(data),
"Fail to parse ProgramDesc from string. This could "
......@@ -152,35 +152,35 @@ void BindProgramDesc(py::module &m) {
}
void BindBlockDesc(py::module &m) {
py::class_<BlockDescBind>(m, "BlockDesc", "")
.def_property_readonly("id", &BlockDescBind::ID)
.def_property_readonly("parent", &BlockDescBind::Parent)
.def("append_op", &BlockDescBind::AppendOp,
py::class_<BlockDesc>(m, "BlockDesc", "")
.def_property_readonly("id", &BlockDesc::ID)
.def_property_readonly("parent", &BlockDesc::Parent)
.def("append_op", &BlockDesc::AppendOp,
py::return_value_policy::reference)
.def("prepend_op", &BlockDescBind::PrependOp,
.def("prepend_op", &BlockDesc::PrependOp,
py::return_value_policy::reference)
.def("remove_op", &BlockDesc::RemoveOp)
.def("var",
[](BlockDescBind &self, py::bytes byte_name) {
[](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name;
return self.Var(name);
},
py::return_value_policy::reference)
.def("has_var",
[](BlockDescBind &self, py::bytes byte_name) {
[](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name;
return self.HasVar(name);
})
.def("find_var",
[](BlockDescBind &self, py::bytes byte_name) {
[](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name;
return self.FindVar(name);
},
py::return_value_policy::reference)
.def("all_vars", &BlockDescBind::AllVars,
py::return_value_policy::reference)
.def("op_size", &BlockDescBind::OpSize)
.def("op", &BlockDescBind::Op, py::return_value_policy::reference)
.def("serialize_to_string", SerializeMessage<BlockDescBind>);
.def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference)
.def("op_size", &BlockDesc::OpSize)
.def("op", &BlockDesc::Op, py::return_value_policy::reference)
.def("serialize_to_string", SerializeMessage<BlockDesc>);
}
void BindVarDsec(py::module &m) {
......@@ -193,25 +193,25 @@ void BindVarDsec(py::module &m) {
.value("FP32", proto::DataType::FP32)
.value("FP64", proto::DataType::FP64);
py::class_<VarDescBind> var_desc(m, "VarDesc", "");
py::class_<VarDesc> var_desc(m, "VarDesc", "");
var_desc
.def("name",
[](const VarDescBind &self) {
[](const VarDesc &self) {
py::bytes name = self.Name();
return name;
},
py::return_value_policy::reference)
.def("set_shape", &VarDescBind::SetShape)
.def("set_dtype", &VarDescBind::SetDataType)
.def("shape", &VarDescBind::Shape, py::return_value_policy::reference)
.def("dtype", &VarDescBind::GetDataType)
.def("lod_level", &VarDescBind::GetLodLevel)
.def("set_lod_level", &VarDescBind::SetLoDLevel)
.def("type", &VarDescBind::GetType)
.def("set_type", &VarDescBind::SetType)
.def("serialize_to_string", SerializeMessage<VarDescBind>)
.def("persistable", &VarDescBind::Persistable)
.def("set_persistable", &VarDescBind::SetPersistable);
.def("set_shape", &VarDesc::SetShape)
.def("set_dtype", &VarDesc::SetDataType)
.def("shape", &VarDesc::Shape, py::return_value_policy::reference)
.def("dtype", &VarDesc::GetDataType)
.def("lod_level", &VarDesc::GetLodLevel)
.def("set_lod_level", &VarDesc::SetLoDLevel)
.def("type", &VarDesc::GetType)
.def("set_type", &VarDesc::SetType)
.def("serialize_to_string", SerializeMessage<VarDesc>)
.def("persistable", &VarDesc::Persistable)
.def("set_persistable", &VarDesc::SetPersistable);
py::enum_<proto::VarDesc::VarType>(var_desc, "VarType", "")
.value("LOD_TENSOR", proto::VarDesc::LOD_TENSOR)
......@@ -235,26 +235,32 @@ void BindOpDesc(py::module &m) {
.value("BOOLS", proto::AttrType::BOOLEANS)
.value("BLOCK", proto::AttrType::BLOCK);
py::class_<OpDescBind> op_desc(m, "OpDesc", "");
op_desc.def("type", &OpDescBind::Type)
.def("set_type", &OpDescBind::SetType)
.def("input", &OpDescBind::Input)
.def("input_names", &OpDescBind::InputNames)
.def("set_input", &OpDescBind::SetInput)
.def("output", &OpDescBind::Output)
.def("output_names", &OpDescBind::OutputNames)
.def("set_output", &OpDescBind::SetOutput)
.def("has_attr", &OpDescBind::HasAttr)
.def("attr_type", &OpDescBind::GetAttrType)
.def("attr_names", &OpDescBind::AttrNames)
.def("set_attr", &OpDescBind::SetAttr)
.def("attr", &OpDescBind::GetAttr)
.def("set_block_attr", &OpDescBind::SetBlockAttr)
.def("block_attr", &OpDescBind::GetBlockAttr)
.def("check_attrs", &OpDescBind::CheckAttrs)
.def("infer_shape", &OpDescBind::InferShape)
.def("infer_var_type", &OpDescBind::InferVarType)
.def("serialize_to_string", SerializeMessage<OpDescBind>);
py::class_<OpDesc> op_desc(m, "OpDesc", "");
op_desc.def("type", &OpDesc::Type)
.def("set_type", &OpDesc::SetType)
.def("input", &OpDesc::Input)
.def("input_names", &OpDesc::InputNames)
.def("set_input", &OpDesc::SetInput)
.def("output", &OpDesc::Output)
.def("output_names", &OpDesc::OutputNames)
.def("set_output", &OpDesc::SetOutput)
.def("has_attr", &OpDesc::HasAttr)
.def("attr_type", &OpDesc::GetAttrType)
.def("attr_names", &OpDesc::AttrNames)
.def("set_attr", &OpDesc::SetAttr)
.def("attr", &OpDesc::GetAttr)
.def("set_block_attr", &OpDesc::SetBlockAttr)
.def("set_serialized_attr",
[](OpDesc &self, const std::string &name,
const py::bytes &seriralized) {
std::string ser(seriralized);
self.SetAttr(name, ser);
})
.def("block_attr", &OpDesc::GetBlockAttr)
.def("check_attrs", &OpDesc::CheckAttrs)
.def("infer_shape", &OpDesc::InferShape)
.def("infer_var_type", &OpDesc::InferVarType)
.def("serialize_to_string", SerializeMessage<OpDesc>);
}
} // namespace pybind
......
......@@ -266,36 +266,36 @@ All parameter, weight, gradient are variables in Paddle.
return ret_values;
});
m.def("get_grad_op_descs",
[](const OpDescBind &op_desc,
[](const OpDesc &op_desc,
const std::unordered_set<std::string> &no_grad_set,
std::unordered_map<std::string, std::string> &grad_to_var,
const std::vector<BlockDescBind *> &grad_sub_block) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs =
const std::vector<BlockDesc *> &grad_sub_block) {
std::vector<std::unique_ptr<OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance()
.Get(op_desc.Type())
.GradOpMaker()(op_desc, no_grad_set, &grad_to_var,
grad_sub_block);
std::vector<OpDescBind *> grad_op_desc_ptrs(grad_op_descs.size());
std::vector<OpDesc *> grad_op_desc_ptrs(grad_op_descs.size());
std::transform(
grad_op_descs.begin(), grad_op_descs.end(),
grad_op_desc_ptrs.begin(),
[](std::unique_ptr<OpDescBind> &p) { return p.release(); });
[](std::unique_ptr<OpDesc> &p) { return p.release(); });
return grad_op_desc_ptrs;
});
m.def("prune", [](const ProgramDescBind &origin,
m.def("prune", [](const ProgramDesc &origin,
const std::vector<std::array<size_t, 2>> &targets) {
ProgramDescBind prog_with_targets(origin);
ProgramDesc prog_with_targets(origin);
for (const auto &t : targets) {
prog_with_targets.MutableBlock(t[0])->Op(t[1])->MarkAsTarget();
}
proto::ProgramDesc pruned_desc;
Prune(*prog_with_targets.Proto(), &pruned_desc);
return new ProgramDescBind(pruned_desc);
return new ProgramDesc(pruned_desc);
});
m.def("inference_optimize", [](ProgramDescBind &origin) {
m.def("inference_optimize", [](ProgramDesc &origin) {
proto::ProgramDesc pruned_desc;
InferenceOptimize(*(origin.Proto()), &pruned_desc);
return new ProgramDescBind(pruned_desc);
return new ProgramDesc(pruned_desc);
});
m.def_submodule(
"var_names",
......
......@@ -14,6 +14,7 @@
#pragma once
#include <string>
#include "paddle/framework/executor.h"
#include "paddle/framework/tensor.h"
#include "paddle/memory/memcpy.h"
#include "pybind11/numpy.h"
......@@ -61,11 +62,15 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
auto *src_ptr = static_cast<const void *>(tensor.data<CUR_TYPE>());
auto *dst_ptr = static_cast<void *>(dst_tensor.mutable_data<CUR_TYPE>(
tensor.dims(), platform::CPUPlace()));
// TODO(qijun): Here we use default CUDA stream to set GPU Tensor to
// a Python numpy array. It's better to manage CDUA stream unifiedly.
paddle::platform::GpuMemcpySync(dst_ptr, src_ptr,
sizeof(CUR_TYPE) * tensor.numel(),
cudaMemcpyDeviceToHost);
framework::DeviceContextPool &pool =
framework::DeviceContextPool::Get();
auto dev_ctx = static_cast<const platform::CUDADeviceContext *>(
pool.Borrow(tensor.place()));
paddle::platform::GpuMemcpyAsync(
dst_ptr, src_ptr, sizeof(CUR_TYPE) * tensor.numel(),
cudaMemcpyDeviceToHost, dev_ctx->stream());
#else
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#endif
......@@ -132,10 +137,12 @@ void PyCUDATensorSetFromArray(
self.Resize(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(place);
// TODO(qijun): Here we use default CUDA stream to set a Python numpy
// array to a GPU Tensor. It's better to manage CDUA stream unifiedly.
paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
cudaMemcpyHostToDevice);
framework::DeviceContextPool &pool = framework::DeviceContextPool::Get();
auto dev_ctx =
static_cast<const platform::CUDADeviceContext *>(pool.Borrow(place));
paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(),
cudaMemcpyHostToDevice, dev_ctx->stream());
}
#endif
......
......@@ -16,13 +16,14 @@ import regularizer
from param_attr import ParamAttr
from data_feeder import DataFeeder
from core import LoDTensor, CPUPlace, GPUPlace
from distribute_transpiler import DistributeTranspiler
import clip
Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + [
'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr'
'DataFeeder', 'clip'
'DataFeeder', 'clip', 'DistributeTranspiler'
]
......
import framework
from framework import Program, default_main_program, Parameter, Variable
import optimizer
from layer_helper import LayerHelper
def hash_name_to_server(params_grads, pserver_endpoints):
"""
:param param_grads:
:return: a map of pserver endpoint ->
params -> [param list]
grads -> [grad list]
"""
def _hash_param(param_name, total):
return hash(param_name) % total
param_grad_map = dict()
for param, grad in params_grads:
if param.trainable is True and grad is not None:
server_id = _hash_param(param.name, len(pserver_endpoints))
server_for_param = pserver_endpoints[server_id]
if not param_grad_map.has_key(server_for_param):
param_grad_map[server_for_param] = {"params": [], "grads": []}
param_grad_map[server_for_param]["params"].append(param)
param_grad_map[server_for_param]["grads"].append(grad)
return param_grad_map
def round_robin(params_grads, pserver_endpoints):
assert (len(params_grads) > len(pserver_endpoints))
param_grad_map = dict()
pserver_idx = 0
for param, grad in params_grads:
if param.trainable is True:
server_for_param = pserver_endpoints[pserver_idx]
if not param_grad_map.has_key(server_for_param):
param_grad_map[server_for_param] = {"params": [], "grads": []}
param_grad_map[server_for_param]["params"].append(param)
param_grad_map[server_for_param]["grads"].append(grad)
pserver_idx += 1
if pserver_idx >= len(pserver_endpoints):
pserver_idx = 0
return param_grad_map
class DistributeTranspiler:
def transpile(self,
optimize_ops,
params_grads,
program=None,
pservers="127.0.0.1:6174",
trainers=1,
split_method=round_robin):
"""
Transpile the program to a distributed data-parallelism programs.
The main_program will be transform to use a remote parameter server
to do parameter optimization. And the optimization graph will be put
in to a parameter server program.
Use different methods to split trainable varialbles to different
parameter servers.
Example to run:
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1)
pserver_endpoint = os.getenv("PSERVER")
if pserver_endpoint:
pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops)
exe.run(fluid.default_startup_program())
exe.run(pserver_prog)
else:
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
...
:param optimize_ops: op list of optimization, should be the
return value of Optimizer.minimize
:type optimize_ops: list
:param program: program to optimize, default default_main_program
:param pservers: parameter server endpoints like "m1:6174,m2:6174"
:type pservers: string
:return: return a list of programs
"""
if program is None:
program = default_main_program()
self.trainers = trainers
self._optimize_distributed(
optimize_ops,
program,
params_grads,
pservers=pservers,
trainers=trainers,
split_method=split_method)
def _clone_param(self, block, v):
assert isinstance(v, Parameter)
new_p = Parameter(
block=block,
shape=v.shape,
dtype=v.dtype,
type=v.type,
lod_level=v.lod_level,
stop_gradient=v.stop_gradient,
trainable=v.trainable,
optimize_attr=v.optimize_attr,
regularizer=v.regularizer,
name=v.name)
block.vars[new_p.name] = new_p
def _clone_var(self, block, var):
assert isinstance(var, Variable)
return block.create_var(
name=var.name,
shape=var.shape,
dtype=var.dtype,
type=var.type,
lod_level=var.lod_level,
persistable=var.persistable)
def _optimize_distributed(self, optimize_ops, program, params_and_grads,
**kwargs):
if kwargs.has_key("split_method"):
split_method = kwargs["split_method"]
else:
split_method = round_robin
assert (callable(split_method))
pserver_endpoints = kwargs["pservers"].split(",")
self.param_grad_map = split_method(params_and_grads, pserver_endpoints)
send_op_ordered_inputs = []
epmap = []
for ep, v in self.param_grad_map.iteritems():
send_op_ordered_inputs.extend(v["grads"])
for i in v["grads"]:
epmap.append(ep)
send_op = program.global_block().append_op(
type="send",
inputs={"X": send_op_ordered_inputs
}, # inputs is a list of tensors to be send
outputs={},
attrs={"endpoints": pserver_endpoints,
"epmap": epmap})
def get_trainer_program(optimize_ops, program):
# remove optimize ops and add a send op to main_program
program.global_block().delete_ops(optimize_ops)
def _create_var_for_trainers(self, block, var, trainers):
var_list = []
for i in xrange(trainers):
var_each = block.create_var(
name="%s.trainer_%d" % (var.name, i),
psersistable=var.persistable,
dtype=var.dtype,
shape=var.shape)
var_list.append(var_each)
return var_list
def get_pserver_program(self, endpoint, optimize_ops):
pserver_program = Program()
for v in self.param_grad_map[endpoint]["params"]:
self._clone_param(pserver_program.global_block(), v)
optimize_sub_program = Program()
grad_var_names = [
var.name for var in self.param_grad_map[endpoint]["grads"]
]
for opt_op in optimize_ops:
for _, var in opt_op.inputs.iteritems():
# NOTE: append operators to merge gradients from multiple
# trainers. If trainers == 1, this is not needed.
if self.trainers > 1 and var.name in grad_var_names:
vars2merge = self._create_var_for_trainers(
optimize_sub_program.global_block(), var, self.trainers)
merged_var = optimize_sub_program.global_block().create_var(
name=var.name,
persistable=var.persistable,
dtype=var.dtype,
shape=var.shape)
optimize_sub_program.global_block().append_op(
type="sum",
inputs={"X": vars2merge},
outputs={"Out": merged_var})
optimize_sub_program.global_block().append_op(
type="scale",
inputs={"X": merged_var},
outputs={"Out": merged_var},
attrs={"scale": 1.0 / float(self.trainers)})
else:
optimize_sub_program.global_block().create_var(
name=var.name,
persistable=var.persistable,
dtype=var.dtype,
shape=var.shape)
if opt_op.inputs.has_key("Grad"):
if opt_op.inputs["Grad"].name in grad_var_names:
print "appending ", opt_op.type, opt_op.inputs
optimize_sub_program.global_block().append_op(
type=opt_op.type,
inputs=opt_op.inputs,
outputs=opt_op.outputs,
attrs=opt_op.attrs)
else:
optimize_sub_program.global_block().append_op(
type=opt_op.type,
inputs=opt_op.inputs,
outputs=opt_op.outputs,
attrs=opt_op.attrs)
pserver_program.global_block().append_op(
type="recv",
inputs={"RX":
self.param_grad_map[endpoint]["grads"]}, # grads to recv
outputs={},
attrs={
"OptimizeProgram": optimize_sub_program.desc,
"endpoint": endpoint,
"ParamList":
[p.name for p in self.param_grad_map[endpoint]["params"]],
"GradList":
[p.name for p in self.param_grad_map[endpoint]["grads"]],
"Trainers": self.trainers
})
pserver_program.sync_with_cpp()
return pserver_program
import numpy as np
from . import core
from framework import Program, default_main_program
from framework import Program, default_main_program, Parameter, Variable
__all__ = ['Executor', 'g_scope']
......@@ -148,7 +148,7 @@ class Executor(object):
outputs={'Out': [fetch_var]},
attrs={'col': i})
self.executor.run(program.desc, scope, 0, True)
self.executor.run(program.desc, scope, 0, True, True)
outs = [
core.get_fetch_variable(scope, fetch_var_name, i)
for i in xrange(len(fetch_list))
......
......@@ -359,6 +359,10 @@ class Operator(object):
"""
self.block = block
self.desc = desc
# for clone a new operator
self.inputs = inputs
self.outputs = outputs
self.attrs = attrs
if len(self.desc.type()) != 0:
return
if type is None:
......@@ -389,6 +393,9 @@ class Operator(object):
% (in_proto.name, len(in_args)))
in_arg_names = []
for arg in in_args:
if isinstance(arg, basestring):
in_arg_names.append(arg)
else:
in_arg_names.append(arg.name)
self.desc.set_input(in_proto.name, in_arg_names)
else:
......@@ -430,13 +437,18 @@ class Operator(object):
continue
if isinstance(attrs[attr_name], Block):
self.desc.set_block_attr(attr_name, attrs[attr_name].desc)
elif isinstance(attrs[attr_name], core.BlockDesc) or \
isinstance(attrs[attr_name], core.ProgramDesc):
self.desc.set_serialized_attr(
attr_name, attrs[attr_name].serialize_to_string())
else:
self.desc.set_attr(attr_name, attrs[attr_name])
self.desc.check_attrs()
no_kernel_op_set = {
'feed', 'fetch', 'save', 'load', 'recurrent',
'rnn_memory_helper_grad', 'conditional_block', 'while', 'get_places'
'rnn_memory_helper_grad', 'conditional_block', 'while', 'send',
'recv', 'get_places'
}
if type not in no_kernel_op_set:
self.desc.infer_var_type(self.block.desc)
......@@ -582,6 +594,7 @@ class Block(object):
self.vars = dict() # var_name --> var
self.ops = collections.deque() # operator list
self.program = program
self.removed_vars = dict()
def __str__(self):
return self.to_string(True)
......@@ -638,6 +651,16 @@ class Block(object):
self.ops.append(op)
return op
def delete_ops(self, ops):
# remove from cpp
# FIXME(typhoonzero): remove only the first occuracy.
try:
start = list(self.ops).index(ops[0])
end = list(self.ops).index(ops[-1])
except Exception, e:
raise e
self.desc.remove_op(start, end)
def prepend_op(self, *args, **kwargs):
op_desc = self.desc.prepend_op()
op = Operator(self, op_desc, *args, **kwargs)
......
......@@ -194,3 +194,9 @@ class LayerHelper(object):
else:
# For integer and boolean types, initialize with all zeros
return Constant()
def is_instance(self, param_name, cls):
param = self.kwargs.get(param_name, None)
if not isinstance(param, cls):
raise TypeError("The input {0} parameter of method {1} must be {2}",
param_name, self.layer_type, cls.__name__)
......@@ -3,6 +3,7 @@ from ..framework import Program, Variable, Operator
from .. import core
from tensor import assign, fill_constant
import contextlib
from ..registry import autodoc
__all__ = [
'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard', 'StaticRNNGuard',
......@@ -10,7 +11,7 @@ __all__ = [
'max_sequence_len', 'topk', 'lod_tensor_to_array', 'array_to_lod_tensor',
'increment', 'array_write', 'create_array', 'less_than', 'array_read',
'shrink_memory', 'array_length', 'IfElse', 'DynamicRNN', 'ConditionalBlock',
'StaticRNN'
'StaticRNN', 'reorder_lod_tensor_by_rank'
]
......@@ -440,9 +441,25 @@ def topk(input, k):
def lod_tensor_to_array(x, table):
"""
This function creates an operator to convert an LOD_Tensor to
"""This function performs the operation that converts an LOD_Tensor to
an array.
Args:
x (Variable|list): The tensor that needs to be converted to an array.
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
descending order.
Returns:
Variable: The variable of type array that has been converted from a
tensor.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10])
table = fluid.layers.lod_rank_table(x, level=0)
array = fluid.layers.lod_tensor_to_array(x, table)
"""
helper = LayerHelper("lod_tensor_to_array", **locals())
array = helper.create_variable(
......@@ -458,9 +475,26 @@ def lod_tensor_to_array(x, table):
def array_to_lod_tensor(x, table):
"""
This function creates an operator to convert an array to a
LOD_Tensor.
"""This function performs the operations that converts an array to
an LOD_Tensor.
Args:
x (Variable|list): The array that needs to be converted to a tensor.
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
descending order.
Returns:
Variable: The variable of type tensor that has been converted
from an array.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10])
table = fluid.layers.lod_rank_table(x, level=0)
array = fluid.layers.lod_tensor_to_array(x, table)
lod_tensor = fluid.layers.array_to_lod_tensor(array, table)
"""
helper = LayerHelper("array_to_lod_tensor", **locals())
tmp = helper.create_tmp_variable(dtype=x.dtype)
......@@ -473,10 +507,24 @@ def array_to_lod_tensor(x, table):
def increment(x, value=1.0, in_place=True):
"""
This function creates an operator to increment each value in the input
`x` by an amount: `value` as mentioned in the input parameter. This
operation is performed in-place by default.
"""This function performs an operation that increments each value in the
input :math:`x` by an amount: :math:`value` as mentioned in the input
parameter. This operation is performed in-place by default.
Args:
x (Variable|list): The tensor that has the input values.
value (float): The amount by which the values should be incremented.
in_place (bool): If the increment should be performed in-place.
Returns:
Variable: The tensor variable storing the transformation of
element-wise increment of each value in the input.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[32, 32], dtype='float32')
data = fluid.layers.increment(x=data, value=3.0, in_place=True)
"""
helper = LayerHelper("increment", **locals())
if not in_place:
......@@ -492,9 +540,24 @@ def increment(x, value=1.0, in_place=True):
def array_write(x, i, array=None):
"""
This function creates an operator to write the data out as a
"""This function performs the operation to write the data out as an
LOD_TENSOR_ARRAY.
Args:
x (Variable|list): The input tensor from which the data will be read.
i (Variable|list): The subscript index in tensor array, that points the
place from which data will be read.
array (Variable|list): The data can be read into this variable if
this is assigned.
Returns:
Variable: The tensor type variable that has the data written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_write(tmp, i=i)
"""
helper = LayerHelper('array_write', **locals())
if array is None:
......@@ -511,6 +574,21 @@ def array_write(x, i, array=None):
def create_array(dtype):
"""This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the
LayerHelper.
Args:
dtype (int|float): The data type of the elements in the array.
Returns:
Variable: The tensor variable storing the elements of data type.
Examples:
.. code-block:: python
data = fluid.layers.create_array(dtype='float32')
"""
helper = LayerHelper("array", **locals())
return helper.create_variable(
name="{0}.out".format(helper.name),
......@@ -519,6 +597,24 @@ def create_array(dtype):
def less_than(x, y, cond=None, **ignored):
"""
**Less than**
This layer returns the truth value of :math:`x < y` elementwise.
Args:
x(Variable): First operand of *less_than*
y(Variable): Second operand of *less_than*
cond(Variable|None): Optional output variable to store the result of *less_than*
Returns:
Variable: The tensor variable storing the output of *less_than*.
Examples:
.. code-block:: python
less = fluid.layers.less_than(x=label, y=limit)
"""
helper = LayerHelper("less_than", **locals())
if cond is None:
cond = helper.create_tmp_variable(dtype='bool')
......@@ -531,9 +627,19 @@ def less_than(x, y, cond=None, **ignored):
def array_read(array, i):
"""
This function creates an operator to read the data in as a
"""This function performs the operation to read the data in as an
LOD_TENSOR_ARRAY.
Args:
array (Variable|list): The input tensor that will be written to an array.
i (Variable|list): The subscript index in tensor array, that points the
place where data will be written to.
Returns:
Variable: The tensor type variable that has the data written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_read(tmp, i=i)
"""
helper = LayerHelper('array_read', **locals())
if not isinstance(
......@@ -567,9 +673,23 @@ def shrink_memory(x, i, table):
def array_length(array):
"""
This function creates an operator to find the length of the
"""This function performs the operation to find the length of the input
LOD_TENSOR_ARRAY.
Args:
array (LOD_TENSOR_ARRAY): The input array that will be used
to compute the length.
Returns:
Variable: The length of the input LoDTensorArray.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = fluid.layers.array_write(tmp, i=i)
arr_len = fluid.layers.array_length(arr)
"""
helper = LayerHelper('array_length', **locals())
tmp = helper.create_tmp_variable(dtype='int64')
......@@ -963,3 +1083,18 @@ class DynamicRNN(object):
if self.status != DynamicRNN.IN_RNN:
raise ValueError("{0} can only be invoked inside rnn block.".format(
method))
@autodoc
def reorder_lod_tensor_by_rank(x, rank_table):
helper = LayerHelper('reorder_lod_tensor_by_rank', **locals())
helper.is_instance('x', Variable)
helper.is_instance('rank_table', Variable)
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='reorder_lod_tensor_by_rank',
inputs={'X': [x],
'RankTable': [rank_table]},
outputs={'Out': [out]})
return out
......@@ -12,20 +12,9 @@ def data(name,
type=core.VarDesc.VarType.LOD_TENSOR,
stop_gradient=True):
"""
Data Layer.
**Data Layer**
Args:
name: The name/alias of the function
shape: Tuple declaring the shape.
append_batch_size: Whether or not to append the data as a batch.
dtype: The type of data : float32, float_16, int etc
type: The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence.
main_program: Name of the main program that calls this
startup_program: Name of the startup program
stop_gradient: A boolean that mentions whether gradient should flow.
This function takes in input and based on whether data has
This function takes in the input and based on whether data has
to be returned back as a minibatch, it creates the global variable using
the helper functions. The global variables can be accessed by all the
following operations and layers in the graph.
......@@ -33,6 +22,24 @@ def data(name,
All the input variables of this function are passed in as local variables
to the LayerHelper constructor.
Args:
name(str): The name/alias of the function
shape(list): Tuple declaring the shape.
append_batch_size(bool): Whether or not to append the data as a batch.
dtype(int|float): The type of data : float32, float_16, int etc
type(VarType): The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence.
main_program(Program): Name of the main program that calls this
startup_program(Program): Name of the startup program
stop_gradient(bool): A boolean that mentions whether gradient should flow.
Returns:
Variable: The global variable that gives access to the data.
Examples:
.. code-block:: python
data = fluid.layers.data(name='x', shape=[784], dtype='float32')
"""
helper = LayerHelper('data', **locals())
shape = list(shape)
......
......@@ -13,7 +13,8 @@ __all__ = [
'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy',
'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
'lstm_unit', 'reduce_sum'
'lstm_unit', 'reduce_sum', 'reduce_mean', 'sequence_first_step',
'sequence_last_step'
]
......@@ -25,34 +26,83 @@ def fc(input,
act=None,
name=None):
"""
Fully Connected Layer.
**Fully Connected Layer**
The fully connected layer can take multiple tensors as its inputs. It
creates a variable (one for each input tensor) called weights for each input
tensor, which represents a fully connected weight matrix from each input
unit to each output unit. The fully connected layer multiplies each input
tensor with its coresponding weight to produce an output Tensor. If
multiple input tensors are given, the results of multiple multiplications
will be sumed up. If bias_attr is not None, a biases variable will be
created and added to the output. Finally, if activation is not None,
it will be applied to the output as well.
This process can be formulated as follows:
.. math::
Out = Act({\sum_{i=0}^{N-1}W_iX_i + b})
In the above equation:
* :math:`N`: Number of the input.
* :math:`X_i`: The input tensor.
* :math:`W`: The weights created by this layer.
* :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Act`: The activation funtion.
* :math:`Out`: The output tensor.
Args:
input: The input tensor to the function
size: The size of the layer
num_flatten_dims: Number of columns in input
param_attr: The parameters/weights to the FC Layer
param_initializer: Initializer used for the weight/parameter. If None, XavierInitializer() is used
bias_attr: The bias parameter for the FC layer
bias_initializer: Initializer used for the bias. If None, then ConstantInitializer() is used
act: Activation to be applied to the output of FC layer
name: Name/alias of the function
main_program: Name of the main program that calls this
startup_program: Name of the startup program
input(Variable|list): The input tensor(s) to the fully connected layer.
size(int): The number of output units in the fully connected layer.
num_flatten_dims(int): The fc layer can accept an input tensor with more
than two dimensions. If this happens, the
multidimensional tensor will first be flattened
into a 2-dimensional matrix. The parameter
`num_flatten_dims` determines how the input tensor
is flattened: the first `num_flatten_dims`
dimensions will be flatten to form the first
dimension of the final matrix (height of the
matrix), and the rest `rank(X) - num_col_dims`
dimensions are flattened to form the second
dimension of the final matrix (width of the matrix).
For example, suppose `X` is a 6-dimensional tensor
with a shape [2, 3, 4, 5, 6], and
`x_num_col_dims` = 3. Then, the flattened matrix
will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
By default, `x_num_col_dims` is set to 1.
param_attr(ParamAttr|list): The parameter attribute for learnable
parameters/weights of the fully connected
layer.
param_initializer(ParamAttr|list): The initializer used for the
weight/parameter. If set None,
XavierInitializer() will be used.
bias_attr(ParamAttr|list): The parameter attribute for the bias parameter
for this layer. If set None, no bias will be
added to the output units.
bias_initializer(ParamAttr|list): The initializer used for the bias.
If set None, then ConstantInitializer()
will be used.
act(str): Activation to be applied to the output of the fully connected
layer.
name(str): Name/alias of the fully connected layer.
This function can take in multiple inputs and performs the Fully Connected
function (linear transformation) on top of each of them.
So for input x, the output will be : Wx + b. Where W is the parameter,
b the bias and x is the input.
The function also applies an activation (non-linearity) on top of the
output, if activation is passed in the input.
Returns:
Variable: The output tensor variable.
All the input variables of this function are passed in as local variables
to the LayerHelper constructor.
Raises:
ValueError: If rank of the input tensor is less than 2.
Examples:
.. code-block:: python
data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
fc = fluid.layers.fc(input=data, size=1000, act="tanh")
"""
helper = LayerHelper('fc', **locals())
helper = LayerHelper("fc", **locals())
dtype = helper.input_dtype()
......@@ -72,8 +122,8 @@ def fc(input,
"Y": w,
},
outputs={"Out": tmp},
attrs={'x_num_col_dims': num_flatten_dims,
'y_num_col_dims': 1})
attrs={"x_num_col_dims": num_flatten_dims,
"y_num_col_dims": 1})
mul_results.append(tmp)
# sum
......@@ -91,25 +141,30 @@ def fc(input,
def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
"""
Embedding Layer.
**Embedding Layer**
This layer is used to lookup a vector of IDs, provided by *input*, in a lookup table.
The result of this lookup is the embedding of each ID in the *input*.
All the input variables are passed in as local variables to the LayerHelper
constructor.
Args:
param_initializer:
input: The input to the function
size: The size of the layer
is_sparse: A flag that decleares whether the input is sparse
param_attr: Parameters for this layer
dtype: The type of data : float32, float_16, int etc
main_program: Name of the main program that calls this
startup_program: Name of the startup program
input(Variable): Input to the function
size(tuple|list|None): Shape of the look up table parameter
is_sparse(bool): Boolean flag that specifying whether the input is sparse
param_attr(ParamAttr): Parameters for this layer
dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
This function can take in the input (which is a vector of IDs) and
performs a lookup in the lookup_table using these IDs, to result into
the embedding of each ID in the input.
Returns:
Variable: The tensor variable storing the embeddings of the \
supplied inputs.
All the input variables of this function are passed in as local variables
to the LayerHelper constructor.
Examples:
.. code-block:: python
data = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32')
fc = fluid.layers.embedding(input=data, size=16)
"""
helper = LayerHelper('embedding', **locals())
......@@ -521,8 +576,52 @@ def conv2d(input,
def sequence_pool(input, pool_type, **kwargs):
"""
This function add the operator for sequence pooling.
This is applied on top of the input using pool_type mentioned
in the parameters.
It pools features of all time-steps of each instance, and is applied
on top of the input using pool_type mentioned in the parameters.
It supports four pool_type:
- average: :math:`Out[i] = \\frac{\sum_i X_i}{N}`
- sum: :math:`Out[i] = \sum_jX_{ij}`
- sqrt: :math:`Out[i] = \\frac{\sum_jX_{ij}}{\sqrt{len(X_i)}}`
- max: :math:`Out[i] = max(X_i)`
.. code-block:: text
x is a 1-level LoDTensor:
x.lod = [[0, 2, 5, 7]]
x.data = [1, 3, 2, 4, 6, 5, 1]
x.dims = [7, 1]
then output is a Tensor:
out.dim = [3, 1]
with condition len(x.lod[-1]) - 1 == out.dims[0]
for different pool_type:
average: out.data = [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
sum : out.data = [4, 12, 6], where 4=1+3, 12=2+4+6, 6=5+1
sqrt : out.data = [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2),
6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2)
max : out.data = [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1)
Args:
input(variable): The input variable which is a LoDTensor.
pool_type (string): The pooling type of sequence_pool.
It supports average, sum, sqrt and max.
Returns:
The sequence pooling variable which is a Tensor.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[7, 1],
dtype='float32', lod_level=1)
avg_x = fluid.layers.sequence_pool(input=x, pool_type='average')
sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum')
sqrt_x = fluid.layers.sequence_pool(input=x, pool_type='sqrt')
max_x = fluid.layers.sequence_pool(input=x, pool_type='max')
"""
helper = LayerHelper('sequence_pool', input=input, **kwargs)
dtype = helper.input_dtype()
......@@ -539,6 +638,72 @@ def sequence_pool(input, pool_type, **kwargs):
return pool_out
def sequence_first_step(input, **kwargs):
"""
This funciton get the first step of sequence.
.. code-block:: text
x is a 1-level LoDTensor:
x.lod = [[0, 2, 5, 7]]
x.data = [1, 3, 2, 4, 6, 5, 1]
x.dims = [7, 1]
then output is a Tensor:
out.dim = [3, 1]
with condition len(x.lod[-1]) - 1 == out.dims[0]
out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)
Args:
input(variable): The input variable which is a LoDTensor.
Returns:
The sequence's first step variable which is a Tensor.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[7, 1],
dtype='float32', lod_level=1)
x_first_step = fluid.layers.sequence_first_step(input=x)
"""
return sequence_pool(input=input, pool_type="first")
def sequence_last_step(input, **kwargs):
"""
This funciton get the last step of sequence.
.. code-block:: text
x is a 1-level LoDTensor:
x.lod = [[0, 2, 5, 7]]
x.data = [1, 3, 2, 4, 6, 5, 1]
x.dims = [7, 1]
then output is a Tensor:
out.dim = [3, 1]
with condition len(x.lod[-1]) - 1 == out.dims[0]
out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)
Args:
input(variable): The input variable which is a LoDTensor.
Returns:
The sequence's last step variable which is a Tensor.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[7, 1],
dtype='float32', lod_level=1)
x_last_step = fluid.layers.sequence_last_step(input=x)
"""
return sequence_pool(input=input, pool_type="last")
def pool2d(input,
pool_size,
pool_type,
......@@ -683,6 +848,7 @@ def conv2d_transpose(input,
filter_size=None,
padding=None,
stride=None,
dilation=None,
param_attr=None):
"""
The transpose of conv2d layer.
......@@ -706,6 +872,9 @@ def conv2d_transpose(input,
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation.
param_attr: Parameter Attribute.
main_program(Program): the main program
startup_program(Program): the startup program
......@@ -726,10 +895,15 @@ def conv2d_transpose(input,
op_attr['paddings'] = padding
if isinstance(stride, int):
op_attr['strides'] = stride
op_attr['strides'] = [stride, stride]
elif stride is not None:
op_attr['strides'] = stride
if isinstance(dilation, int):
op_attr['dilations'] = [dilation, dilation]
elif dilation is not None:
op_attr['dilations'] = dilation
if filter_size is None:
if output_size is None:
raise ValueError("output_size must be set when filter_size is None")
......@@ -738,14 +912,17 @@ def conv2d_transpose(input,
padding = op_attr.get('paddings', [0, 0])
stride = op_attr.get('strides', [1, 1])
dilation = op_attr.get('dilations', [1, 1])
h_in = input.shape[2]
w_in = input.shape[3]
filter_size_h = output_size[0] - \
(h_in - 1) * stride[0] + 2 * padding[0]
filter_size_w = output_size[1] - \
(w_in - 1) * stride[1] + 2 * padding[1]
filter_size_h = (output_size[0] - (h_in - 1) * stride[0] + 2 *
padding[0] - 1) / dilation[0] + 1
filter_size_w = (output_size[1] - (w_in - 1) * stride[1] + 2 *
padding[1] - 1) / dilation[1] + 1
filter_size = [filter_size_h, filter_size_w]
elif isinstance(filter_size, int):
filter_size = [filter_size, filter_size]
......@@ -979,3 +1156,47 @@ def reduce_sum(input, dim=None, keep_dim=False):
'reduce_all': True if dim == None else False
})
return out
def reduce_mean(input, dim=None, keep_dim=False):
"""
Computes the mean of tensor elements over the given dimension.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int|None): The dimension along which the mean is computed. If
:attr:`None`, compute the mean over all elements of :attr:`input`
and return a Tensor variable with a single element, otherwise
must be in the range :math:`[-rank(input), rank(input))`. If
:math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
Returns:
Variable: The reduced Tensor variable.
Examples:
.. code-block:: python
# x is a Tensor variable with following elements:
# [[0.2, 0.3, 0.5, 0.9]
# [0.1, 0.2, 0.6, 0.7]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_mean(x) # [0.4375]
fluid.layers.reduce_mean(x, dim=0) # [0.15, 0.25, 0.55, 0.8]
fluid.layers.reduce_mean(x, dim=-1) # [0.475, 0.4]
fluid.layers.reduce_mean(x, dim=1, keep_dim=True) # [[0.475], [0.4]]
"""
helper = LayerHelper('reduce_mean', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='reduce_mean',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else 0,
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
return out
......@@ -27,10 +27,23 @@ def cast(x, dtype):
return out
def concat(input, axis):
def concat(input, axis=0):
"""
This function concats the input along the axis mentioned
**Concat**
This function concatenates the input along the axis mentioned
and returns that as the output.
Args:
input(list): List of tensors to be concatenated
axis(int): Integer axis along which the tensors will be concatenated
Returns:
Variable: Output variable of the concatenation
Examples:
.. code-block:: python
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
"""
helper = LayerHelper('concat', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
......@@ -43,9 +56,28 @@ def concat(input, axis):
def sums(input, out=None):
"""
This function takes in the input and performs the sum operation on it
and returns that as the output.
"""This function performs the sum operation on the input and returns the
result as the output.
Args:
input (Variable|list): The input tensor that has the elements
that need to be summed up.
Returns:
Variable: The tensor type variable that has the sum of input
written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
a0 = layers.array_read(array=tmp, i=i)
i = layers.increment(x=i)
a1 = layers.array_read(array=tmp, i=i)
mean_a0 = layers.mean(x=a0)
mean_a1 = layers.mean(x=a1)
a_sum = layers.sums(input=[mean_a0, mean_a1])
"""
helper = LayerHelper('sum', **locals())
if out is None:
......@@ -55,6 +87,24 @@ def sums(input, out=None):
def assign(input, output):
"""
**Assign**
This function copies the *input* Variable to the *output* Variable.
Args:
input(Variable): The source variable
output(Variable): The destination variable
Returns:
Variable: The destination variable that was supplied as the *output*.
Examples:
.. code-block:: python
out = fluid.layers.create_tensor(dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
fluid.layers.assign(hidden, out)
"""
helper = LayerHelper('assign', **locals())
helper.append_op(
type='scale',
......@@ -66,9 +116,26 @@ def assign(input, output):
def fill_constant(shape, dtype, value, out=None):
"""
This function creates a tensor , with shape as mentioned in the input and
specified dtype and fills this up with a constant value that
comes in the input. It also sets the stop_gradient to be True.
**fill_constant**
This function creates a tensor of specified *shape* and
*dtype*, and initializes this with a constant supplied in *value*.
It also sets *stop_gradient* to True.
Args:
shape(tuple|list|None): Shape of output tensor
dtype(np.dtype|core.DataType|str): Data type of output tensor
value(float): Constant value to initialize the output tensor
out(Variable): Output Variable to initialize
Returns:
Variable: The tensor variable storing the output
Examples:
.. code-block:: python
data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
"""
helper = LayerHelper("fill_constant", **locals())
if out is None:
......@@ -90,6 +157,31 @@ def fill_constant_batch_size_like(input,
value,
input_dim_idx=0,
output_dim_idx=0):
"""
**fill_constant_batch_size_like**
This function creates a tensor of specified *shape*, *dtype* and batch size,
and initializes this with a constant supplied in *value*. The batch size is
obtained from the `input` tensor.
It also sets *stop_gradient* to True.
Args:
input(Variable): Tensor whose dimensions will be used to get batch size
shape(tuple|list|None): Shape of output tensor
dtype(np.dtype|core.DataType|str): Data type of output tensor
value(float): Constant value to initialize the output tensor
input_dim_idx(int): Index of input's batch size dimension
output_dim_idx(int): Index of output's batch size dimension
Returns:
Variable: The tensor variable storing the output
Examples:
.. code-block:: python
data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
"""
helper = LayerHelper("fill_constant_batch_size_like", **locals())
out = helper.create_tmp_variable(dtype=dtype)
helper.append_op(
......
......@@ -207,7 +207,7 @@ class Optimizer(object):
optimize_ops = self.create_optimization_pass(params_grads, loss,
startup_program)
return optimize_ops
return optimize_ops, params_grads
class SGDOptimizer(Optimizer):
......
......@@ -58,7 +58,9 @@ class ParamAttr(object):
def to_kwargs(self, with_initializer=False):
kwargs = {
'name': self.name,
'learning_rate': self.learning_rate,
'optimize_attr': {
'learning_rate': self.learning_rate
},
'regularizer': self.regularizer,
'trainable': self.trainable,
'clip_attr': self.clip
......
......@@ -8,7 +8,7 @@ import proto.framework_pb2 as framework_pb2
from framework import OpProtoHolder, Variable, Program, Operator
from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
__all__ = ['deprecated', 'register_layer']
__all__ = ['deprecated', 'register_layer', 'autodoc']
def _convert_(name):
......@@ -175,12 +175,18 @@ def deprecated(func_or_class):
"""
Wrap func with deprecated warning
"""
warnings.simplefilter('always', DeprecationWarning) #turn off filter
warnings.simplefilter('always', DeprecationWarning) # turn off filter
warnings.warn(
"Call to deprecated function {}.".format(func.__name__),
category=DeprecationWarning,
stacklevel=2)
warnings.simplefilter('default', DeprecationWarning) #reset filter
warnings.simplefilter('default', DeprecationWarning) # reset filter
return func(*args, **kwargs)
return func_wrapper
def autodoc(func):
func.__doc__ = _generate_doc_string_(OpProtoHolder.instance().get_op_proto(
func.__name__))
return func
from __future__ import print_function
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import os
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=images,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
predict = fluid.layers.fc(input=conv_pool_2, size=10, act="softmax")
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adam(learning_rate=0.01)
optimize_ops, params_grads = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=predict, label=label)
BATCH_SIZE = 50
PASS_NUM = 3
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
t = fluid.DistributeTranspiler()
pserver_endpoints = os.getenv("PSERVERS")
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=1)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(pserver_endpoints, optimize_ops)
exe.run(fluid.default_startup_program())
exe.run(pserver_prog)
elif training_role == "TRAINER":
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM):
accuracy.reset(exe)
for data in train_reader():
loss, acc = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe)
# print loss, acc
if loss < 10.0 and pass_acc > 0.9:
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good.
exit(0)
pass_acc = accuracy.eval(exe)
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
exit(1)
......@@ -33,7 +33,7 @@ def encoder_decoder():
fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4)
encoder_out = layers.sequence_pool(input=lstm_hidden0, pool_type="last")
encoder_out = layers.sequence_last_step(input=lstm_hidden0)
# decoder
trg_language_word = layers.data(
......
......@@ -208,7 +208,7 @@ class TestBatchNormOp(OpTest):
print 'python: NHWC, NCHW, backward checking passed'
def test_forward_backward(self):
def test_with_place(place, tensor_format, shape):
def test_with_place(place, data_layout, shape):
# attr
epsilon = 0.00001
momentum = 0.9
......@@ -292,7 +292,7 @@ class TestBatchNormOp(OpTest):
SavedVariance="saved_variance",
# attrs
is_test=False,
tensor_format=tensor_format,
data_layout=data_layout,
momentum=momentum,
epsilon=epsilon)
......@@ -311,7 +311,7 @@ class TestBatchNormOp(OpTest):
atol = 1e-4
self.__assert_close(variance_out_tensor, variance_out,
"variance_out", atol)
print "op test forward passed: ", str(place), tensor_format
print "op test forward passed: ", str(place), data_layout
# run backward
batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set())
......@@ -336,11 +336,15 @@ class TestBatchNormOp(OpTest):
self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad")
self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad")
self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad")
print "op test backward passed: ", str(place), tensor_format
print "op test backward passed: ", str(place), data_layout
places = [core.CPUPlace()]
if core.is_compile_gpu() and core.op_support_gpu("batch_norm"):
places.append(core.GPUPlace(0))
core.init_devices(["CPU", "GPU:0"])
else:
core.init_devices(["CPU"])
for place in places:
for data_format in ["NCHW", "NHWC"]:
test_with_place(place, data_format, [2, 3, 4, 5])
......
......@@ -3,14 +3,17 @@ import numpy as np
from op_test import OpTest
def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
def conv2dtranspose_forward_naive(input_, filter_, attrs):
in_n, in_c, in_h, in_w = input_.shape
f_c, out_c, f_h, f_w = filter_.shape
assert in_c == f_c
stride, pad = conv2dtranspose_param['stride'], conv2dtranspose_param['pad']
out_h = (in_h - 1) * stride[0] + f_h
out_w = (in_w - 1) * stride[1] + f_w
stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[
'dilations']
d_bolck_h = dilations[0] * (f_h - 1) + 1
d_bolck_w = dilations[1] * (f_w - 1) + 1
out_h = (in_h - 1) * stride[0] + d_bolck_h
out_w = (in_w - 1) * stride[1] + d_bolck_w
out = np.zeros((in_n, out_c, out_h, out_w))
......@@ -23,9 +26,9 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :], axis=0)
i1, i2 = i * stride[0], i * stride[0] + f_h
j1, j2 = j * stride[0], j * stride[0] + f_w
out[n, k, i1:i2, j1:j2] += tmp_out
i1, i2 = i * stride[0], i * stride[0] + d_bolck_h
j1, j2 = j * stride[0], j * stride[0] + d_bolck_h
out[n, k, i1:i2:dilations[0], j1:j2:dilations[1]] += tmp_out
out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]]
return out
......@@ -37,11 +40,8 @@ class TestConv2dTransposeOp(OpTest):
self.init_op_type()
self.init_test_case()
conv2dtranspose_param = {'stride': self.stride, 'pad': self.pad}
input_ = np.random.random(self.input_size).astype("float32")
filter_ = np.random.random(self.filter_size).astype("float32")
output = conv2dtranspose_forward_naive(
input_, filter_, conv2dtranspose_param).astype('float32')
self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = {
......@@ -49,6 +49,10 @@ class TestConv2dTransposeOp(OpTest):
'paddings': self.pad,
'dilations': self.dilations
}
output = conv2dtranspose_forward_naive(input_, filter_,
self.attrs).astype('float32')
self.outputs = {'Output': output}
def test_check_output(self):
......@@ -104,11 +108,60 @@ class TestWithStride(TestConv2dTransposeOp):
self.filter_size = [f_c, 6, 3, 3]
class TestWithDilation(TestConv2dTransposeOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [2, 2]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
# ------------ test_cudnn ------------
class TestCudnn(TestConv2dTransposeOp):
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
class TestCudnnWithPad(TestWithPad):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
class TestCudnnWithStride(TestWithStride):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
# #cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# def init_test_case(self):
# self.pad = [1, 1]
# self.stride = [2, 2]
# self.dilations = [2, 2]
# self.input_size = [2, 3, 5, 5] # NCHW
# f_c = self.input_size[1]
# self.filter_size = [f_c, 6, 3, 3]
#
# def init_op_type(self):
# self.op_type = "conv2d_transpose_cudnn"
if __name__ == '__main__':
unittest.main()
......@@ -3,15 +3,20 @@ import numpy as np
from op_test import OpTest
def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
def conv3dtranspose_forward_naive(input_, filter_, attrs):
in_n, in_c, in_d, in_h, in_w = input_.shape
f_c, out_c, f_d, f_h, f_w = filter_.shape
assert in_c == f_c
stride, pad = conv3dtranspose_param['stride'], conv3dtranspose_param['pad']
out_d = (in_d - 1) * stride[0] + f_d
out_h = (in_h - 1) * stride[1] + f_h
out_w = (in_w - 1) * stride[2] + f_w
stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[
'dilations']
d_bolck_d = dilations[0] * (f_d - 1) + 1
d_bolck_h = dilations[1] * (f_h - 1) + 1
d_bolck_w = dilations[2] * (f_w - 1) + 1
out_d = (in_d - 1) * stride[0] + d_bolck_d
out_h = (in_h - 1) * stride[1] + d_bolck_h
out_w = (in_w - 1) * stride[2] + d_bolck_w
out = np.zeros((in_n, out_c, out_d, out_h, out_w))
for n in range(in_n):
......@@ -25,10 +30,11 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :, :],
axis=0)
d1, d2 = d * stride[0], d * stride[0] + f_d
i1, i2 = i * stride[1], i * stride[1] + f_h
j1, j2 = j * stride[2], j * stride[2] + f_w
out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out
d1, d2 = d * stride[0], d * stride[0] + d_bolck_d
i1, i2 = i * stride[1], i * stride[1] + d_bolck_h
j1, j2 = j * stride[2], j * stride[2] + d_bolck_w
out[n, k, d1:d2:dilations[0], i1:i2:dilations[1], j1:j2:
dilations[2]] += tmp_out
out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w -
pad[2]]
......@@ -41,18 +47,19 @@ class TestConv3dTransposeOp(OpTest):
self.init_op_type()
self.init_test_case()
conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad}
input_ = np.random.random(self.input_size).astype("float32")
filter_ = np.random.random(self.filter_size).astype("float32")
output = conv3dtranspose_forward_naive(
input_, filter_, conv3dtranspose_param).astype("float32")
self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
# 'dilations': self.dilations
'dilations': self.dilations
}
output = conv3dtranspose_forward_naive(input_, filter_,
self.attrs).astype("float32")
self.outputs = {'Output': output}
def test_check_output(self):
......@@ -108,11 +115,60 @@ class TestWithStride(TestConv3dTransposeOp):
self.filter_size = [f_c, 6, 3, 3, 3]
class TestWithDilation(TestConv3dTransposeOp):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [2, 2, 2]
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
# ------------ test_cudnn ------------
class TestCudnn(TestConv3dTransposeOp):
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
class TestCudnnWithPad(TestWithPad):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
class TestCudnnWithStride(TestWithStride):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [2, 2, 2]
self.dilations = [1, 1, 1]
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
# #cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# def init_test_case(self):
# self.pad = [1, 1, 1]
# self.stride = [2, 2, 2]
# self.dilations = [2, 2, 2]
# self.input_size = [2, 3, 5, 5, 5] # NCDHW
# f_c = self.input_size[1]
# self.filter_size = [f_c, 6, 3, 3, 3]
#
# def init_op_type(self):
# self.op_type = "conv3d_transpose_cudnn"
if __name__ == '__main__':
unittest.main()
......@@ -47,7 +47,9 @@ class TestDropoutOp4(OpTest):
self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
self.attrs = {'dropout_prob': 0.35, 'is_test': True}
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']}
self.outputs = {
'Out': self.inputs['X'] * (1.0 - self.attrs['dropout_prob'])
}
def test_check_output(self):
self.check_output()
......@@ -58,7 +60,9 @@ class TestDropoutOp5(OpTest):
self.op_type = "dropout"
self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")}
self.attrs = {'dropout_prob': 0.75, 'is_test': True}
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']}
self.outputs = {
'Out': self.inputs['X'] * (1.0 - self.attrs['dropout_prob'])
}
def test_check_output(self):
self.check_output()
......
......@@ -63,8 +63,7 @@ class TestDynRNN(unittest.TestCase):
all_timesteps = fluid.layers.array_to_lod_tensor(
x=out, table=rank_table)
last = fluid.layers.sequence_pool(
input=all_timesteps, pool_type='last')
last = fluid.layers.sequence_last_step(input=all_timesteps)
logits = fluid.layers.fc(input=last, size=1, act=None)
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label)
......@@ -101,7 +100,7 @@ class TestDynRNN(unittest.TestCase):
rnn.update_memory(mem, out_)
rnn.output(out_)
last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
last = fluid.layers.sequence_last_step(input=rnn())
logits = fluid.layers.fc(input=last, size=1, act=None)
label = fluid.layers.data(name='label', shape=[1], dtype='float32')
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
......
......@@ -7,7 +7,7 @@ class TestFillZerosLikeOp(OpTest):
def setUp(self):
self.op_type = "fill_zeros_like"
self.inputs = {'X': np.random.random((219, 232)).astype("float32")}
self.outputs = {'Y': np.zeros_like(self.inputs["X"])}
self.outputs = {'Out': np.zeros_like(self.inputs["X"])}
def test_check_output(self):
self.check_output()
......
import unittest
import numpy
import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core
from paddle.v2.fluid.op import Operator
import numpy
from paddle.v2.fluid.executor import Executor
class TestGaussianRandomOp(unittest.TestCase):
def setUp(self):
self.op_type = "gaussian_random"
self.inputs = {}
self.attrs = {"shape": [1000, 784], "mean": .0, "std": 1., "seed": 10}
self.outputs = ["Out"]
def test_cpu(self):
self.gaussian_random_test(place=core.CPUPlace())
self.gaussian_random_test(place=fluid.CPUPlace())
def test_gpu(self):
if core.is_compile_gpu():
self.gaussian_random_test(place=core.GPUPlace(0))
self.gaussian_random_test(place=fluid.GPUPlace(0))
def gaussian_random_test(self, place):
scope = core.Scope()
scope.var('Out').get_tensor()
op = Operator(
"gaussian_random",
Out='Out',
shape=[1000, 784],
mean=.0,
std=1.,
seed=10)
context = core.DeviceContext.create(place)
op.run(scope, context)
tensor = numpy.array(scope.find_var('Out').get_tensor())
program = fluid.Program()
block = program.global_block()
vout = block.create_var(name="Out")
op = block.append_op(
type=self.op_type, outputs={"Out": vout}, attrs=self.attrs)
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
fetch_list = []
for var_name in self.outputs:
fetch_list.append(block.var(var_name))
exe = Executor(place)
outs = exe.run(program, fetch_list=fetch_list)
tensor = outs[0]
self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
......
......@@ -27,7 +27,7 @@ class TestOptimizer(unittest.TestCase):
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01)
opts = sgd_optimizer.minimize(mean_out, init_program)
opts, _ = sgd_optimizer.minimize(mean_out, init_program)
self.assertEqual(len(opts), 1)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
......@@ -57,7 +57,7 @@ class TestOptimizer(unittest.TestCase):
learning_rate = 0.01
sgd_optimizer = optimizer.SGDOptimizer(
learning_rate=learning_rate, global_step=global_step)
opts = sgd_optimizer.minimize(mean_out, init_program)
opts, _ = sgd_optimizer.minimize(mean_out, init_program)
self.assertEqual(len(opts), 2)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
......
import unittest
import paddle.v2.fluid as fluid
import numpy
class TestReorderLoDTensor(unittest.TestCase):
def test_reorder(self):
dat = fluid.layers.data(name='input', shape=[1], lod_level=2)
dat.stop_gradient = False
rank_dat = fluid.layers.data(name='ref', shape=[1], lod_level=1)
table = fluid.layers.lod_rank_table(rank_dat)
new_dat = fluid.layers.reorder_lod_tensor_by_rank(
x=dat, rank_table=table)
loss = fluid.layers.mean(x=new_dat)
fluid.backward.append_backward_ops(loss=loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
exe.run(fluid.default_startup_program())
ref = fluid.Tensor()
ref_lod = [0, 3, 4, 7, 8, 14]
ref.set_lod([ref_lod])
ref.set(numpy.random.random(size=[14, 1]).astype('float32'), cpu)
input = fluid.Tensor()
lod_level_0 = numpy.random.randint(low=1, high=5, size=5)
lod_level_0 = [0] + numpy.cumsum(lod_level_0).tolist()
lod_level_1 = numpy.random.randint(low=1, high=5, size=lod_level_0[-1])
lod_level_1 = [0] + numpy.cumsum(lod_level_1).tolist()
input.set_lod([lod_level_0, lod_level_1])
input.set(
numpy.random.random(size=[lod_level_1[-1], 1]).astype('float32'),
cpu)
ig = exe.run(fluid.default_main_program(),
feed={'input': input,
'ref': ref},
fetch_list=['input@GRAD'],
return_numpy=False)[0]
self.assertAlmostEqual(numpy.array(ig).sum(), 1.0, delta=0.001)
self.assertEqual(input.lod(), ig.lod())
if __name__ == '__main__':
unittest.main()
import unittest
import numpy
from paddle.v2.fluid.op import Operator
import paddle.v2.fluid.core as core
import numpy
import paddle.v2.fluid as fluid
class TestUniformRandomOp(unittest.TestCase):
def test_uniform_random_cpu(self):
def setUp(self):
self.op_type = "uniform_random"
self.inputs = {}
self.attrs = {
"shape": [1000, 784],
"min": -5.0,
"max": 10.0,
"seed": 10
}
self.outputs = ["Out"]
def test_cpu(self):
self.uniform_random_test(place=core.CPUPlace())
def test_uniform_random_gpu(self):
def test_gpu(self):
if core.is_compile_gpu():
self.uniform_random_test(place=core.GPUPlace(0))
def uniform_random_test(self, place):
scope = core.Scope()
scope.var('X').get_tensor()
op = Operator(
"uniform_random",
Out='X',
shape=[1000, 784],
min=-5.0,
max=10.0,
seed=10)
ctx = core.DeviceContext.create(place)
op.run(scope, ctx)
tensor = numpy.array(scope.find_var('X').get_tensor())
context = core.DeviceContext.create(place)
program = fluid.Program()
block = program.global_block()
vout = block.create_var(name="Out")
op = block.append_op(
type=self.op_type, outputs={"Out": vout}, attrs=self.attrs)
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
fetch_list = []
for var_name in self.outputs:
fetch_list.append(block.var(var_name))
exe = fluid.Executor(place)
outs = exe.run(program, fetch_list=fetch_list)
tensor = outs[0]
self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册