diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py index 3358d43a4b08c6a9b89d59e1a8be53ee1f12bbe0..77d130ae34059d1e87040d00346ac1dadd86b0d8 100644 --- a/benchmark/paddle/image/alexnet.py +++ b/benchmark/paddle/image/alexnet.py @@ -6,8 +6,18 @@ height = 227 width = 227 num_class = 1000 batch_size = get_config_arg('batch_size', int, 128) +gp = get_config_arg('layer_num', int, 1) +is_infer = get_config_arg("is_infer", bool, False) +num_samples = get_config_arg('num_samples', int, 2560) -args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} +args = { + 'height': height, + 'width': width, + 'color': True, + 'num_class': num_class, + 'is_infer': is_infer, + 'num_samples': num_samples +} define_py_data_sources2( "train.list", None, module="provider", obj="process", args=args) @@ -31,7 +41,7 @@ net = img_pool_layer(input=net, pool_size=3, stride=2) # conv2 net = img_conv_layer( - input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1) + input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=gp) net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75) net = img_pool_layer(input=net, pool_size=3, stride=2) @@ -40,11 +50,11 @@ net = img_conv_layer( input=net, filter_size=3, num_filters=384, stride=1, padding=1) # conv4 net = img_conv_layer( - input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1) + input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=gp) # conv5 net = img_conv_layer( - input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1) + input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=gp) net = img_pool_layer(input=net, pool_size=3, stride=2) net = fc_layer( @@ -59,6 +69,9 @@ net = fc_layer( layer_attr=ExtraAttr(drop_rate=0.5)) net = fc_layer(input=net, size=1000, act=SoftmaxActivation()) -lab = data_layer('label', num_class) -loss = cross_entropy(input=net, label=lab) -outputs(loss) +if is_infer: + outputs(net) +else: + lab = data_layer('label', num_class) + loss = cross_entropy(input=net, label=lab) + outputs(loss) diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py index 7059c13bd2c2b98eb3fbcf633a6f7064e54d5402..2a850ccb7f2c75b467554181fc5f4aa8f2b97a09 100644 --- a/benchmark/paddle/image/googlenet.py +++ b/benchmark/paddle/image/googlenet.py @@ -7,13 +7,15 @@ num_class = 1000 batch_size = get_config_arg('batch_size', int, 128) use_gpu = get_config_arg('use_gpu', bool, True) is_infer = get_config_arg("is_infer", bool, False) +num_samples = get_config_arg('num_samples', int, 2560) args = { 'height': height, 'width': width, 'color': True, 'num_class': num_class, - 'is_infer': is_infer + 'is_infer': is_infer, + 'num_samples': num_samples } define_py_data_sources2( "train.list" if not is_infer else None, diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py index 927b1759941f362ef4b5ffe84dd01332986d9306..1018ec9ce1e529f618ddd7b7afa72a84c5e876a1 100644 --- a/benchmark/paddle/image/provider.py +++ b/benchmark/paddle/image/provider.py @@ -14,6 +14,7 @@ def initHook(settings, height, width, color, num_class, **kwargs): else: settings.data_size = settings.height * settings.width settings.is_infer = kwargs.get('is_infer', False) + settings.num_samples = kwargs.get('num_samples', 2560) if settings.is_infer: settings.slots = [dense_vector(settings.data_size)] else: @@ -23,7 +24,7 @@ def initHook(settings, height, width, color, num_class, **kwargs): @provider( init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM) def process(settings, file_list): - for i in xrange(2560 if settings.is_infer else 1024): + for i in xrange(settings.num_samples): img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten() if settings.is_infer: yield img.astype('float32') diff --git a/benchmark/paddle/image/resnet.py b/benchmark/paddle/image/resnet.py index 4a14363ff1db48a5072cbb5f5eb3bc9241ffca8f..2846e4763f1cda4602f03af5ec649d57ee6cf0d8 100644 --- a/benchmark/paddle/image/resnet.py +++ b/benchmark/paddle/image/resnet.py @@ -7,13 +7,15 @@ num_class = 1000 batch_size = get_config_arg('batch_size', int, 64) layer_num = get_config_arg("layer_num", int, 50) is_infer = get_config_arg("is_infer", bool, False) +num_samples = get_config_arg('num_samples', int, 2560) args = { 'height': height, 'width': width, 'color': True, 'num_class': num_class, - 'is_infer': is_infer + 'is_infer': is_infer, + 'num_samples': num_samples } define_py_data_sources2( "train.list" if not is_infer else None, diff --git a/benchmark/paddle/image/run_mkl_infer.sh b/benchmark/paddle/image/run_mkl_infer.sh index d795bcab1b7d098295066f79189d17e8299d28fb..62c9bf6efd3810f506fd4592b2ba3a21b1b7f0e7 100755 --- a/benchmark/paddle/image/run_mkl_infer.sh +++ b/benchmark/paddle/image/run_mkl_infer.sh @@ -37,7 +37,7 @@ function infer() { --trainer_count=1 \ --num_passes=1 \ --save_dir="models/${topology}-${layer_num}" \ - --config_args="batch_size=128,layer_num=${layer_num}" \ + --config_args="batch_size=128,layer_num=${layer_num},num_samples=256" \ > /dev/null 2>&1 echo "Done" fi @@ -79,8 +79,9 @@ fi # inference benchmark for use_mkldnn in True False; do for batchsize in 1 2 4 8 16; do - infer googlenet v1 $batchsize $use_mkldnn - infer resnet 50 $batchsize $use_mkldnn infer vgg 19 $batchsize $use_mkldnn + infer resnet 50 $batchsize $use_mkldnn + infer googlenet v1 $batchsize $use_mkldnn + infer alexnet 2 $batchsize $use_mkldnn done done diff --git a/benchmark/paddle/image/run_mkl_train.sh b/benchmark/paddle/image/run_mkl_train.sh index 5335af5ac1b9a4a48ec107b8b6386b50ead8284c..03d2d378fb72e36f765d89af788f6ee96fe21d4e 100755 --- a/benchmark/paddle/image/run_mkl_train.sh +++ b/benchmark/paddle/image/run_mkl_train.sh @@ -47,5 +47,6 @@ for use_mkldnn in True False; do train vgg 19 $batchsize $use_mkldnn train resnet 50 $batchsize $use_mkldnn train googlenet v1 $batchsize $use_mkldnn + train alexnet 2 $batchsize $use_mkldnn done done diff --git a/benchmark/paddle/image/run_openblas_infer.sh b/benchmark/paddle/image/run_openblas_infer.sh index c1001d3a7c95a293d0b2b5b78fb7415e167b3e9f..da034f3b9dff794e22086a5295ad2b0c2361c356 100755 --- a/benchmark/paddle/image/run_openblas_infer.sh +++ b/benchmark/paddle/image/run_openblas_infer.sh @@ -23,24 +23,25 @@ function infer() { echo "./run_mkl_infer.sh to save the model first" exit 0 fi - log_period=$((256 / bs)) + log_period=$((32 / bs)) paddle train --job=test \ --config="${topology}.py" \ + --use_mkldnn=False \ --use_gpu=False \ --trainer_count=$thread \ --log_period=$log_period \ - --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \ + --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True,num_samples=256" \ --init_model_path=$models_in \ 2>&1 | tee ${log} - # calculate the last 5 logs period time of 1280 samples, + # calculate the last 5 logs period time of 160(=32*5) samples, # the time before are burning time. start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs` end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs` start_sec=`clock_to_seconds $start` end_sec=`clock_to_seconds $end` - fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'` - echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log} + fps=`awk 'BEGIN{printf "%.2f",(160 / ('$end_sec' - '$start_sec'))}'` + echo "Last 160 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log} echo "FPS: $fps images/sec" 2>&1 | tee -a ${log} } @@ -56,7 +57,8 @@ fi # inference benchmark for batchsize in 1 2 4 8 16; do - infer googlenet v1 $batchsize - infer resnet 50 $batchsize infer vgg 19 $batchsize + infer resnet 50 $batchsize + infer googlenet v1 $batchsize + infer alexnet 2 $batchsize done diff --git a/benchmark/paddle/image/run_openblas_train.sh b/benchmark/paddle/image/run_openblas_train.sh index b9494ce119523953a3360b2b67e2cb6f3e0f1643..e9df83fee2a3f796b7234b39619364f6ee4d5dc9 100755 --- a/benchmark/paddle/image/run_openblas_train.sh +++ b/benchmark/paddle/image/run_openblas_train.sh @@ -12,10 +12,11 @@ function train() { config="${topology}.py" paddle train --job=time \ --config=$config \ + --use_mkldnn=False \ --use_gpu=False \ --trainer_count=$thread \ - --log_period=10 \ - --test_period=100 \ + --log_period=3 \ + --test_period=30 \ --config_args=$args \ 2>&1 | tee ${log} @@ -36,4 +37,5 @@ for batchsize in 64 128 256; do train vgg 19 $batchsize train resnet 50 $batchsize train googlenet v1 $batchsize + train alexnet 2 $batchsize done diff --git a/benchmark/paddle/image/vgg.py b/benchmark/paddle/image/vgg.py index 8d0a1e97a451cd52ef17e4e326673cc90059ef3c..ca0a6798fb8c35b68cf84d263855955eb93ba0b0 100644 --- a/benchmark/paddle/image/vgg.py +++ b/benchmark/paddle/image/vgg.py @@ -7,13 +7,15 @@ num_class = 1000 batch_size = get_config_arg('batch_size', int, 64) layer_num = get_config_arg('layer_num', int, 19) is_infer = get_config_arg("is_infer", bool, False) +num_samples = get_config_arg('num_samples', int, 2560) args = { 'height': height, 'width': width, 'color': True, 'num_class': num_class, - 'is_infer': is_infer + 'is_infer': is_infer, + 'num_samples': num_samples } define_py_data_sources2( "train.list" if not is_infer else None, diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index fab2af362bb070a54987b6499748056f3d12a56b..ff5855052dabaa0b63099cd219f3f04e22f1aa85 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -253,9 +253,9 @@ IF(NOT PROTOBUF_FOUND) IF(WITH_C_API) INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf) IF(ANDROID) - INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI}) + INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI}) ELSE() - INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib) + INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib) ENDIF() ENDIF() diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index c3f9c18d0663a7a24880b441981875c1e4f015aa..d81481ca819c13ee0e299c204f998f3915c34bd4 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -467,7 +467,7 @@ lambda_cost :noindex: square_error_cost --------- +----------------- .. autoclass:: paddle.v2.layer.square_error_cost :noindex: @@ -533,7 +533,7 @@ Miscs ===== dropout --------------- +-------- .. autoclass:: paddle.v2.layer.dropout :noindex: diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 842f3b18007a55fb538fbe5d5fefc3f4b75ebe14..b25009310c96c22453b2931f25ca6c1942c0bc0b 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -19,17 +19,17 @@ dynamic_lstm :noindex: data ---------- +---- .. autofunction:: paddle.v2.fluid.layers.data :noindex: mean ---------- +---- .. autofunction:: paddle.v2.fluid.layers.mean :noindex: mul ---------- +--- .. autofunction:: paddle.v2.fluid.layers.mul :noindex: @@ -45,13 +45,13 @@ elementwise_div dropout ---------- +------- .. autofunction:: paddle.v2.fluid.layers.dropout :noindex: reshape ---------- +-------- .. autofunction:: paddle.v2.fluid.layers.reshape :noindex: @@ -81,67 +81,67 @@ transpose sigmoid_cross_entropy_with_logits ---------- +--------------------------------- .. autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits :noindex: cast ---------- +---- .. autofunction:: paddle.v2.fluid.layers.cast :noindex: concat ---------- +------- .. autofunction:: paddle.v2.fluid.layers.concat :noindex: sums ---------- +---- .. autofunction:: paddle.v2.fluid.layers.sums :noindex: linear_chain_crf ---------- +---------------- .. autofunction:: paddle.v2.fluid.layers.linear_chain_crf :noindex: assign ---------- +------- .. autofunction:: paddle.v2.fluid.layers.embedding :noindex: split_lod_tensor ---------- +---------------- .. autofunction:: paddle.v2.fluid.layers.split_lod_tensor :noindex: merge_lod_tensor ---------- +---------------- .. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor :noindex: cos_sim ---------- +-------- .. autofunction:: paddle.v2.fluid.layers.cos_sim :noindex: cross_entropy ---------- +------------- .. autofunction:: paddle.v2.fluid.layers.cross_entropy :noindex: square_error_cost ---------- +----------------- .. autofunction:: paddle.v2.fluid.layers.square_error_cost :noindex: @@ -153,68 +153,68 @@ accuracy sequence_conv ---------- +------------- .. autofunction:: paddle.v2.fluid.layers.sequence_conv :noindex: conv2d ---------- +------ .. autofunction:: paddle.v2.fluid.layers.conv2d :noindex: sequence_pool ---------- +------------- .. autofunction:: paddle.v2.fluid.layers.sequence_pool :noindex: pool2d ---------- +------ .. autofunction:: paddle.v2.fluid.layers.pool2d :noindex: batch_norm ---------- +---------- .. autofunction:: paddle.v2.fluid.layers.batch_norm :noindex: beam_search_decode ---------- +------------------ .. autofunction:: paddle.v2.fluid.layers.beam_search_decode :noindex: lod_rank_table ---------- +-------------- .. autofunction:: paddle.v2.fluid.layers.lod_rank_table :noindex: max_sequence_len ---------- +---------------- .. autofunction:: paddle.v2.fluid.layers.max_sequence_len :noindex: topk ---------- +----- .. autofunction:: paddle.v2.fluid.layers.topk :noindex: lod_tensor_to_array ---------- +------------------- .. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array :noindex: array_to_lod_tensor ---------- +------------------- .. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor :noindex: @@ -222,26 +222,26 @@ array_to_lod_tensor fill_constant ---------- +------------- .. autofunction:: paddle.v2.fluid.layers.fill_constant :noindex: fill_constant_batch_size_like ---------- +----------------------------- .. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like :noindex: ones ---------- +---- .. autofunction:: paddle.v2.fluid.layers.ones :noindex: zeros ---------- +----- .. autofunction:: paddle.v2.fluid.layers.zeros :noindex: @@ -253,14 +253,14 @@ increment array_write ---------- +----------- .. autofunction:: paddle.v2.fluid.layers.array_write :noindex: create_array ---------- +------------ .. autofunction:: paddle.v2.fluid.layers.create_array :noindex: @@ -272,31 +272,31 @@ less_than array_read ---------- +---------- .. autofunction:: paddle.v2.fluid.layers.array_read :noindex: shrink_memory ---------- +-------------- .. autofunction:: paddle.v2.fluid.layers.shrink_memory :noindex: array_length ---------- +------------- .. autofunction:: paddle.v2.fluid.layers.array_length :noindex: conv2d_transpose ---------- +---------------- .. autofunction:: paddle.v2.fluid.layers.conv2d_transpose :noindex: sequence_expand ---------- +--------------- .. autofunction:: paddle.v2.fluid.layers.sequence_expand :noindex: @@ -308,13 +308,13 @@ lstm_unit sequence_softmax ---------- +---------------- .. autofunction:: paddle.v2.fluid.layers.sequence_softmax :noindex: reduce_sum ---------- +---------- .. autofunction:: paddle.v2.fluid.layers.reduce_sum :noindex: diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/v2/fluid/nets.rst index 2c3d075422de29c96e25458e831133a30270dd39..b792efb71f85ae643df655568da69c82414e9d5d 100644 --- a/doc/api/v2/fluid/nets.rst +++ b/doc/api/v2/fluid/nets.rst @@ -3,19 +3,19 @@ Nets =========== simple_img_conv_pool ------------ +-------------------- .. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool :noindex: img_conv_group ------------ +--------------- .. autofunction:: paddle.v2.fluid.nets.img_conv_group :noindex: sequence_conv_pool ------------ +------------------ .. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool :noindex: diff --git a/doc/api/v2/fluid/optimizer.rst b/doc/api/v2/fluid/optimizer.rst index 233762fcdfb39e592740adef6721a556fae3feef..19b4940f08de3e2f7dc177f2961e538946d10a78 100644 --- a/doc/api/v2/fluid/optimizer.rst +++ b/doc/api/v2/fluid/optimizer.rst @@ -18,7 +18,7 @@ SGDOptimizer MomentumOptimizer ------------ +----------------- .. automodule:: paddle.v2.fluid.optimizer :members: MomentumOptimizer :noindex: @@ -26,14 +26,14 @@ MomentumOptimizer AdagradOptimizer ------------ +---------------- .. automodule:: paddle.v2.fluid.optimizer :members: AdagradOptimizer :noindex: AdamOptimizer ------------ +------------- .. automodule:: paddle.v2.fluid.optimizer :members: AdamOptimizer :noindex: @@ -47,7 +47,7 @@ AdamaxOptimizer DecayedAdagradOptimizer ------------ +----------------------- .. automodule:: paddle.v2.fluid.optimizer :members: DecayedAdagradOptimizer :noindex: diff --git a/doc/api/v2/fluid/regularizer.rst b/doc/api/v2/fluid/regularizer.rst index 3af2b07d2ae55d99df705fbf1ad2402eee05c435..868e225ed3d59e79aeb217fb88081ea25f80fa2c 100644 --- a/doc/api/v2/fluid/regularizer.rst +++ b/doc/api/v2/fluid/regularizer.rst @@ -3,14 +3,14 @@ Regularizer =========== WeightDecayRegularizer ------------ +---------------------- .. automodule:: paddle.v2.fluid.regularizer :members: WeightDecayRegularizer :noindex: L2DecayRegularizer ------------ +------------------ .. automodule:: paddle.v2.fluid.regularizer :members: L2DecayRegularizer :noindex: @@ -18,7 +18,7 @@ L2DecayRegularizer L1DecayRegularizer ------------ +------------------- .. automodule:: paddle.v2.fluid.regularizer :members: L1DecayRegularizer diff --git a/doc/design/operator_kernel_type.md b/doc/design/operator_kernel_type.md new file mode 100644 index 0000000000000000000000000000000000000000..aa82e96bf79319f1a57e2ad58aa9826e57be6470 --- /dev/null +++ b/doc/design/operator_kernel_type.md @@ -0,0 +1,91 @@ +# Design Doc: The Keys of Operator Kernel Type +## Problem +An operator can have different kernel implementations, and each operator will have a map to store the related kernels. Fluid uses `OpKernelType` as a key to identify a unique Kernel. Before an operator runs, an certain kernel must be chosen by a key of `OpKernelType`. Currently, `OpKernelType` is defined as follows: + +```cpp +struct OpKernelType { + platform::Place place_; + proto::DataType data_type_; +}; +``` +For more details, please refer to [codes](https://github.com/PaddlePaddle/Paddle/blob/2d5ec16bc8a09fb8e0f62c89b116b0cd1d333907/paddle/framework/operator.h#L348-L374) in github. + +It contains two keys, `Place` and `DataType`. And these two keys will be hashed to a unique key to represent a certain type of kernel. However, these two keys are not enough. We need a more complete representation of `OpKernelType`. + +We often implement a kernel of an operator with some computing library in certain device(place). Please remind that computing library and device are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices. + +For example, Eigen library can support Nvidia GPU/AMD GPU/CPU. And MKLDNN library can support Intel CPU/Intel FPGA. Both `Place` and `Library` should be a key of `OpKernelType`. + +It's obvious that different DataTypes, like fp64/fp32/int8 will have different kernels. But the data layout of a Tensor will also lead to different implementation. Please refer to the batch norm operator [kernels](https://github.com/PaddlePaddle/Paddle/blob/a948fac4d0ad7e0412d373b8aabeb711c2899563/paddle/operators/batch_norm_op.cc#L180-L209). Data Layout should also be taken into consideration. + +## Solution + +There are four keys to determine a kernel type of an operator: `Place`/`Library`/`DataType`/`Layout`. + +```cpp +struct OpKernelType { + platform::Place place_; + platform::Library library_; + proto::DataType data_type_; + framework::Layout layout_; +}; +``` + +Following is the details: + +### Place + +`Place` is defined as follows: + +```cpp +typedef boost::variant Place; +``` + +`Place` is to represent the device memory where data is locating. + + +### Library + +One operator kernel is usually implemented based on one library. `Library` is defined as a enum variable: + +```cpp +enum Library { Plain, MKLDNN, CUDNN }; +``` + +We use `Plain` enumerator to represent default library. Since most operators in Fluid are implemented based on `Eigen` library, we take `Eigen` library as the `Plain` enumerator. +A library usually has a corresponding `DeviceContext` which contains some handles needed by computation. Fluid now have two default DeviceContexts in CPU and CUDA, `CPUDeviceContext` and `CUDADeviceContext`. `CPUDeviceContext` contains a Eigen library handle and `CDUADeviceContext` contains a Eigen library handle and cuBLAS handle. + +If we want to support new Library, a new enumerator need to be added to `Library` and a new corresponding `LibraryDeviceContext` will be created. + + +### DataType + + +`DataType` is defined in [framework.proto](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto). Currently, int32/int64/fp32/fp64 are supported. + +### Layout + +Actually, a Tensor is a view of a block of memory. Besides a pointer to the memory, we also have to get some other descriptions of this block of memory, such as shape(ddim), stride, and layout. + +Different layout leads to different implementation of operator kernel. There are mainly 4 principles we have to follow to support layout in our fluid framework. + +- We take layout as a data member of Tensor. Layout is actually a enum variable. If fluid is built with MKLDNN, then, the memory format in MKLDNN will be added into this enum variable too. + +- Users have to set layout for input data. And some operators like fill_constant/random, also have to set layout of generating data. Of course, we can have some default layout, like NCHW. + +- The inference of Layout is at run-time, not compile-time. + +- Every operator have to implement different kernels for different layouts. Let's take MKLDNN as an example, if we want to implement a MKLDNN convolution operator, we have to realize all the kernels for different layout, list at [here](http://01org.github.io/mkl-dnn/structmkldnn_1_1memory.html). And we will have a special macro to do registering kernels for MKLDNN operators. + +`Layout` is also defined as a enum variable: + +```cpp +enum Layout { + kNCHW, + kNHWC, +#ifdef PADDLE_WITH_MKLDNN + knChw8c + ... +#endif +}; +``` diff --git a/doc/getstarted/build_and_install/pip_install_cn.rst b/doc/getstarted/build_and_install/pip_install_cn.rst index b270e2c2f0b0cbfd6fb4b9b0750d207952f84d76..a4587f82a984acf243f49834e707fcd66d5b1252 100644 --- a/doc/getstarted/build_and_install/pip_install_cn.rst +++ b/doc/getstarted/build_and_install/pip_install_cn.rst @@ -37,11 +37,11 @@ PaddlePaddle可以使用常用的Python包管理工具 :header: "版本说明", "cp27-cp27mu", "cp27-cp27m", "C-API" :widths: 1, 3, 3, 3 - "cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" - "cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "暂无" - "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" - "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" - "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "暂无" + "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" .. _pip_dependency: diff --git a/doc/getstarted/build_and_install/pip_install_en.rst b/doc/getstarted/build_and_install/pip_install_en.rst index 70f601a11c610e0a2b5dcc8b73d2c3ea19e195e1..55e31560a0f5087ab69966a6281c6c8573c04204 100644 --- a/doc/getstarted/build_and_install/pip_install_en.rst +++ b/doc/getstarted/build_and_install/pip_install_en.rst @@ -40,11 +40,11 @@ If the links below shows up the login form, just click "Log in as guest" to star :header: "version", "cp27-cp27mu", "cp27-cp27m", "C-API" :widths: 1, 3, 3, 3 - "cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" - "cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "Not Available" - "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" - "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" - "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "Not Available" + "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" .. _pip_dependency: diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index f1a577325f1b1ead6b1e08ee35a0ea30ce003bb6..222aee5974ca82c12a9c3b18f7fac4f96ee251bd 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -42,7 +42,7 @@ static std::unordered_set& CtrlFlowOps() { static inline std::unique_ptr CreateGradOp( const OperatorBase& op, const std::unordered_set& no_grad_set, std::unordered_map* grad_to_var) { - OpDescBind op_desc; + OpDesc op_desc; op_desc.SetInputMap(op.Inputs()); op_desc.SetOutputMap(op.Outputs()); op_desc.SetType(op.Type()); @@ -53,7 +53,7 @@ static inline std::unique_ptr CreateGradOp( grad_ops.reserve(grad_descs.size()); std::transform(grad_descs.begin(), grad_descs.end(), std::back_inserter(grad_ops), - [](const std::unique_ptr& grad_desc) { + [](const std::unique_ptr& grad_desc) { return OpRegistry::CreateOp(*grad_desc); }); PADDLE_ENFORCE(!grad_ops.empty()); @@ -217,7 +217,7 @@ static std::unique_ptr BackwardRecursive( // If part of input gradient of that operator is not calculated, fill // zero variables to that input gradient. net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}}, - {{"Y", {grad_input}}}, + {{"Out", {grad_input}}}, AttributeMap{})); } return false; @@ -296,7 +296,7 @@ static std::string FwdName(const std::string& grad_name) { static void CreateGradVarInBlock( size_t grad_op_start_index, const std::unordered_map& param_name_map, - BlockDescBind* block_desc, + BlockDesc* block_desc, std::unordered_map* grad_var_record) { auto ops = block_desc->AllOps(); for (size_t op_index = grad_op_start_index; op_index < ops.size(); @@ -350,12 +350,11 @@ static void CreateGradVarInBlock( } } -std::vector> MakeOpGrad( - const OpDescBind* op_desc, std::unordered_set* no_grad_vars, +std::vector> MakeOpGrad( + const OpDesc* op_desc, std::unordered_set* no_grad_vars, std::unordered_map* grad_to_var, - const std::vector& grad_block = - std::vector()) { - std::vector> grad_op_descs; + const std::vector& grad_block = std::vector()) { + std::vector> grad_op_descs; // All input gradients of forwarding operator do not need to calculate. const std::vector& inputs = op_desc->InputArgumentNames(); if (AllGradInSet(inputs, *no_grad_vars)) { @@ -386,7 +385,7 @@ std::vector> MakeOpGrad( .Get(op_desc->Type()) .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block); - std::list> pending_fill_zeros_ops; + std::list> pending_fill_zeros_ops; for (auto& desc : grad_op_descs) { for (const std::string& in_name : desc->InputArgumentNames()) { if (no_grad_vars->count(in_name)) { @@ -394,9 +393,9 @@ std::vector> MakeOpGrad( 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); std::string new_name = prefix + kZeroVarSuffix; desc->Rename(in_name, new_name); - std::unique_ptr fill_zeros_op( - new OpDescBind("fill_zeros_like", {{"X", {prefix}}}, - {{"Y", {new_name}}}, AttributeMap{})); + std::unique_ptr fill_zeros_op( + new OpDesc("fill_zeros_like", {{"X", {prefix}}}, + {{"Out", {new_name}}}, AttributeMap{})); pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); } } @@ -408,34 +407,33 @@ std::vector> MakeOpGrad( return grad_op_descs; } -static BlockDescBind* CreateStepBlock( - ProgramDescBind& program_desc, - std::unordered_set* no_grad_vars, +static BlockDesc* CreateStepBlock( + ProgramDesc& program_desc, std::unordered_set* no_grad_vars, std::unordered_map* grad_to_var, int step_block_idx); -std::vector> MakeBlockBackward( - ProgramDescBind& program_desc, int block_idx, +std::vector> MakeBlockBackward( + ProgramDesc& program_desc, int block_idx, std::unordered_set* no_grad_vars, std::unordered_map* grad_to_var) { VLOG(5) << "MakeBlockBackward"; - BlockDescBind* cur_block = program_desc.MutableBlock(block_idx); - std::vector op_descs = cur_block->AllOps(); + BlockDesc* cur_block = program_desc.MutableBlock(block_idx); + std::vector op_descs = cur_block->AllOps(); std::unordered_map> dup_out_ops; size_t grad_desc_idx = 0; - std::vector> backward_descs; + std::vector> backward_descs; for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { VLOG(5) << "Making backward " << (*it)->Type() << " op"; - std::vector> op_grads; + std::vector> op_grads; if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") { int step_block_idx = (*it)->GetBlockAttr("sub_block"); - BlockDescBind* backward_block = CreateStepBlock( - program_desc, no_grad_vars, grad_to_var, step_block_idx); + BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars, + grad_to_var, step_block_idx); op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); } else if ((*it)->Type() == "conditional_block") { - BlockDescBind* backward_block = + BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars, grad_to_var, (*it)->GetBlockAttr("sub_block")); op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); @@ -463,14 +461,14 @@ std::vector> MakeBlockBackward( } ++grad_desc_idx; } - std::transform( - op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs), - [](std::unique_ptr& ptr) { return std::move(ptr); }); + std::transform(op_grads.begin(), op_grads.end(), + std::back_inserter(backward_descs), + [](std::unique_ptr& ptr) { return std::move(ptr); }); } VLOG(5) << "Appending Sums"; // Check whether some variables are written more than once - std::list>> pending_sum_ops; + std::list>> pending_sum_ops; for (const auto& dup : dup_out_ops) { const std::string& out_name = dup.first; const std::vector dup_op = dup.second; @@ -486,18 +484,17 @@ std::vector> MakeBlockBackward( sum_op_inputs.emplace_back(new_name); next_g_name = sum_op_inputs.back(); } - std::unique_ptr sum_op( - new OpDescBind("sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}}, - AttributeMap{})); + std::unique_ptr sum_op(new OpDesc("sum", {{"X", sum_op_inputs}}, + {{"Out", {out_name}}}, + AttributeMap{})); pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)}); } } - pending_sum_ops.sort( - [](const std::pair>& a, - const std::pair>& b) { - return a.first > b.first; - }); + pending_sum_ops.sort([](const std::pair>& a, + const std::pair>& b) { + return a.first > b.first; + }); for (auto& p : pending_sum_ops) { backward_descs.insert(backward_descs.begin() + p.first + 1, std::move(p.second)); @@ -508,14 +505,13 @@ std::vector> MakeBlockBackward( return backward_descs; } -static BlockDescBind* CreateStepBlock( - ProgramDescBind& program_desc, - std::unordered_set* no_grad_vars, +static BlockDesc* CreateStepBlock( + ProgramDesc& program_desc, std::unordered_set* no_grad_vars, std::unordered_map* grad_to_var, int step_block_idx) { auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx, no_grad_vars, grad_to_var); - BlockDescBind* backward_block = + BlockDesc* backward_block = program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx)); for (auto& ptr : backward_block_op_descs) { backward_block->AppendAllocatedOp(move(ptr)); @@ -524,7 +520,7 @@ static BlockDescBind* CreateStepBlock( } ParamGradInfoMap AppendBackward( - ProgramDescBind& program_desc, const VarDescBind& target, + ProgramDesc& program_desc, const VarDesc& target, const std::unordered_set& no_grad_vars) { std::unordered_set no_grad_var_names; no_grad_var_names.reserve(no_grad_vars.size() + 1); @@ -541,11 +537,11 @@ ParamGradInfoMap AppendBackward( PADDLE_ENFORCE(is_scalar, "target should be scalar"); VLOG(3) << "backward from loss=" << target.Name() << " data_type=" << target.GetDataType(); - std::unique_ptr fill_one_op( - new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, - {{"shape", std::vector{1}}, - {"value", static_cast(1.0)}, - {"dtype", target.GetDataType()}})); + std::unique_ptr fill_one_op( + new OpDesc("fill_constant", {}, {{"Out", {fill_one_op_out}}}, + {{"shape", std::vector{1}}, + {"value", static_cast(1.0)}, + {"dtype", target.GetDataType()}})); // infer var type of fill_one_op fill_one_op->InferVarType(root_block); diff --git a/paddle/framework/backward.h b/paddle/framework/backward.h index 96154fa82cb7a486aa4762ae633982ed6735220b..2d3b75fe6966cb5dad32dc185a3973e92b23e26e 100644 --- a/paddle/framework/backward.h +++ b/paddle/framework/backward.h @@ -49,7 +49,7 @@ using ParamGradInfoMap = std::unordered_map; ParamGradInfoMap AppendBackward( - ProgramDescBind& program_desc, const VarDescBind& target, + ProgramDesc& program_desc, const VarDesc& target, const std::unordered_set& no_grad_vars); } // namespace framework diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 1099fffab3129876b3fd3a93cbc4d8a5bd29bea1..0957646b5642cd9afce5d88b2c638679cb01f198 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -58,13 +58,13 @@ class RowWiseAddGradMaker : public SingleGradOpDescMaker { using SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto grad_op = new OpDescBind(); + std::unique_ptr Apply() const override { + auto grad_op = new OpDesc(); grad_op->SetInput(GradVarName("Out"), OutputGrad("Out")); grad_op->SetOutput(GradVarName("X"), InputGrad("X")); grad_op->SetOutput(GradVarName("b"), InputGrad("b")); grad_op->SetType("rowwise_add_grad"); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; @@ -159,7 +159,7 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker { FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "x"); - AddOutput("Y", "out"); + AddOutput("Out", "out"); AddComment(""); } }; @@ -190,11 +190,11 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase { public: using GradOpDescMakerBase::GradOpDescMakerBase; - std::vector> operator()() const override { - std::vector> retv; + std::vector> operator()() const override { + std::vector> retv; auto x_g = InputGrad("X"); if (!x_g.empty()) { - auto *op_desc = new OpDescBind(); + auto *op_desc = new OpDesc(); op_desc->SetType("scale"); op_desc->SetInput("X", OutputGrad("Out")); op_desc->SetOutput("Out", x_g); @@ -204,7 +204,7 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase { auto y_g = InputGrad("Y"); if (!y_g.empty()) { - auto *op_desc = new OpDescBind(); + auto *op_desc = new OpDesc(); op_desc->SetType("scale"); op_desc->SetInput("X", OutputGrad("Out")); op_desc->SetOutput("Out", y_g); @@ -430,8 +430,8 @@ TEST(Backward, op_part_of_output_are_not_need) { ASSERT_EQ("fill_zeros_like", fill_zero.Type()); ASSERT_EQ(1UL, fill_zero.Inputs("X").size()); ASSERT_EQ("Z", fill_zero.Input("X")); - ASSERT_EQ(1UL, fill_zero.Outputs("Y").size()); - ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Y")); + ASSERT_EQ(1UL, fill_zero.Outputs("Out").size()); + ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out")); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.Type()); @@ -505,25 +505,25 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { } TEST(Backward, simple_single_op) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); - f::OpDescBind *op = block->AppendOp(); + f::OpDesc *op = block->AppendOp(); op->SetType("rowwise_add"); op->SetInput("X", {"x"}); op->SetInput("b", {"b"}); op->SetOutput("Out", {"out"}); - auto target = f::VarDescBind("out"); + auto target = f::VarDesc("out"); target.SetShape({1}); auto var_to_grad = AppendBackward(program, target, std::unordered_set{}); ASSERT_EQ(block->AllOps().size(), 3UL); - f::OpDescBind *fill_op = block->AllOps()[1]; + f::OpDesc *fill_op = block->AllOps()[1]; EXPECT_EQ(fill_op->Type(), "fill_constant"); - f::OpDescBind *grad_op = block->AllOps()[2]; + f::OpDesc *grad_op = block->AllOps()[2]; EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op->InputNames().size(), 1UL); ASSERT_EQ(grad_op->OutputNames().size(), 2UL); @@ -543,16 +543,16 @@ TEST(Backward, simple_single_op) { } TEST(Backward, default_attribute) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); - f::OpDescBind *op = block->AppendOp(); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); + f::OpDesc *op = block->AppendOp(); op->SetType("mul"); op->SetInput("X", {"x"}); op->SetInput("Y", {"y"}); op->SetOutput("Out", {"out"}); op->CheckAttrs(); - auto target = f::VarDescBind("out"); + auto target = f::VarDesc("out"); target.SetShape({1}); AppendBackward(program, target, std::unordered_set{}); @@ -560,47 +560,47 @@ TEST(Backward, default_attribute) { EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1); - f::OpDescBind *fill_op = block->AllOps()[1]; + f::OpDesc *fill_op = block->AllOps()[1]; EXPECT_EQ(fill_op->Type(), "fill_constant"); - f::OpDescBind *grad_op = block->AllOps()[2]; + f::OpDesc *grad_op = block->AllOps()[2]; ASSERT_EQ(grad_op->Type(), "mul_grad"); EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1); } TEST(Backward, simple_mult_op) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); - f::OpDescBind *op1 = block->AppendOp(); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); + f::OpDesc *op1 = block->AppendOp(); op1->SetType("rowwise_add"); op1->SetInput("X", {"x1"}); op1->SetInput("b", {"b1"}); op1->SetOutput("Out", {"out1"}); - f::OpDescBind *op2 = block->AppendOp(); + f::OpDesc *op2 = block->AppendOp(); op2->SetType("mul"); op2->SetInput("X", {"out1"}); op2->SetInput("Y", {"y2"}); op2->SetOutput("Out", {"out2"}); - f::OpDescBind *op3 = block->AppendOp(); + f::OpDesc *op3 = block->AppendOp(); op3->SetType("rowwise_add"); op3->SetInput("X", {"out2"}); op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); - auto target = f::VarDescBind("out3"); + auto target = f::VarDesc("out3"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, std::unordered_set{}); ASSERT_EQ(block->AllOps().size(), 6UL + 1); - f::OpDescBind *fill_op = block->AllOps()[forward_len]; + f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); - f::OpDescBind *grad_op1 = block->AllOps()[6]; + f::OpDesc *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -611,7 +611,7 @@ TEST(Backward, simple_mult_op) { EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); - f::OpDescBind *grad_op2 = block->AllOps()[5]; + f::OpDesc *grad_op2 = block->AllOps()[5]; EXPECT_EQ(grad_op2->Type(), "mul_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 4UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); @@ -625,7 +625,7 @@ TEST(Backward, simple_mult_op) { EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); - f::OpDescBind *grad_op3 = block->AllOps()[4]; + f::OpDesc *grad_op3 = block->AllOps()[4]; EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); @@ -655,42 +655,42 @@ TEST(Backward, simple_mult_op) { } TEST(Backward, intermedia_var_no_grad) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); - f::OpDescBind *op1 = block->AppendOp(); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); + f::OpDesc *op1 = block->AppendOp(); op1->SetType("rowwise_add"); op1->SetInput("X", {"x1"}); op1->SetInput("b", {"b1"}); op1->SetOutput("Out", {"out1"}); - f::OpDescBind *op2 = block->AppendOp(); + f::OpDesc *op2 = block->AppendOp(); op2->SetType("mul"); op2->SetInput("X", {"x2"}); op2->SetInput("Y", {"y2"}); op2->SetOutput("Out", {"out2"}); - f::OpDescBind *op3 = block->AppendOp(); + f::OpDesc *op3 = block->AppendOp(); op3->SetType("rowwise_add"); op3->SetInput("X", {"out2"}); op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); - f::OpDescBind *op4 = block->AppendOp(); + f::OpDesc *op4 = block->AppendOp(); op4->SetType("mul"); op4->SetInput("X", {"out1"}); op4->SetInput("Y", {"out3"}); op4->SetOutput("Out", {"out4"}); - auto target = f::VarDescBind("out4"); + auto target = f::VarDesc("out4"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"out3"}); ASSERT_EQ(block->AllOps().size(), 7UL); - f::OpDescBind *fill_op = block->AllOps()[forward_len]; + f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); - f::OpDescBind *grad_op1 = block->AllOps()[6]; + f::OpDesc *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -701,7 +701,7 @@ TEST(Backward, intermedia_var_no_grad) { EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); - f::OpDescBind *grad_op4 = block->AllOps()[5]; + f::OpDesc *grad_op4 = block->AllOps()[5]; EXPECT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); @@ -726,32 +726,32 @@ TEST(Backward, intermedia_var_no_grad) { } TEST(Backward, var_no_grad) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); - f::OpDescBind *op1 = block->AppendOp(); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); + f::OpDesc *op1 = block->AppendOp(); op1->SetType("mult_in_out"); op1->SetInput("X", {"x1"}); op1->SetInput("H", {"h1"}); op1->SetOutput("Y", {"y1"}); op1->SetOutput("Z", {"z1"}); - f::OpDescBind *op2 = block->AppendOp(); + f::OpDesc *op2 = block->AppendOp(); op2->SetType("mult_in_out"); op2->SetInput("X", {"y1"}); op2->SetInput("H", {"z1"}); op2->SetOutput("Y", {"y2"}); op2->SetOutput("Z", {"z2"}); - auto target = f::VarDescBind("z2"); + auto target = f::VarDesc("z2"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"z1"}); ASSERT_EQ(block->AllOps().size(), 6UL); - f::OpDescBind *fill_op = block->AllOps()[forward_len]; + f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); - f::OpDescBind *grad_op2 = block->AllOps()[3]; + f::OpDesc *grad_op2 = block->AllOps()[3]; ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 6UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); @@ -767,15 +767,15 @@ TEST(Backward, var_no_grad) { std::vector({f::GradVarName("y1")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector()); - f::OpDescBind *fill_zero_op = block->AllOps()[4]; + f::OpDesc *fill_zero_op = block->AllOps()[4]; ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); EXPECT_EQ(fill_zero_op->Input("X"), std::vector({"z1"})); - EXPECT_EQ(fill_zero_op->Output("Y"), + EXPECT_EQ(fill_zero_op->Output("Out"), std::vector({std::string("z1") + f::kZeroVarSuffix})); - f::OpDescBind *grad_op1 = block->AllOps()[5]; + f::OpDesc *grad_op1 = block->AllOps()[5]; ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 6UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -803,37 +803,37 @@ TEST(Backward, var_no_grad) { } TEST(Backward, shared_var) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); - f::OpDescBind *op1 = block->AppendOp(); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); + f::OpDesc *op1 = block->AppendOp(); op1->SetType("rowwise_add"); op1->SetInput("X", {"x1"}); op1->SetInput("b", {"b1"}); op1->SetOutput("Out", {"out1"}); - f::OpDescBind *op2 = block->AppendOp(); + f::OpDesc *op2 = block->AppendOp(); op2->SetType("mul"); op2->SetInput("X", {"out1"}); op2->SetInput("Y", {"y2"}); op2->SetOutput("Out", {"out2"}); - f::OpDescBind *op3 = block->AppendOp(); + f::OpDesc *op3 = block->AppendOp(); op3->SetType("rowwise_add"); op3->SetInput("X", {"out1"}); op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); - auto target = f::VarDescBind("out3"); + auto target = f::VarDesc("out3"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, std::unordered_set{}); ASSERT_EQ(block->AllOps().size(), 8UL); - f::OpDescBind *fill_op = block->AllOps()[forward_len]; + f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); - f::OpDescBind *grad_op3 = block->AllOps()[4]; + f::OpDesc *grad_op3 = block->AllOps()[4]; ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); @@ -844,7 +844,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), std::vector({f::GradVarName("b3")})); - f::OpDescBind *grad_op4 = block->AllOps()[5]; + f::OpDesc *grad_op4 = block->AllOps()[5]; ASSERT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); @@ -858,7 +858,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); - f::OpDescBind *sum_op = block->AllOps()[6]; + f::OpDesc *sum_op = block->AllOps()[6]; ASSERT_EQ(sum_op->Type(), "sum"); ASSERT_EQ(sum_op->InputNames().size(), 1UL); ASSERT_EQ(sum_op->OutputNames().size(), 1UL); @@ -868,7 +868,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(sum_op->Output("Out"), std::vector({f::GradVarName("out1")})); - f::OpDescBind *grad_op1 = block->AllOps()[7]; + f::OpDesc *grad_op1 = block->AllOps()[7]; ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -895,19 +895,19 @@ TEST(Backward, shared_var) { } TEST(Backward, half_backward) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); auto *op1 = block->AppendOp(); op1->SetType("minus"); op1->SetInput("X", {"a"}); op1->SetInput("Y", {"b"}); op1->SetOutput("Out", {"out"}); - auto target = f::VarDescBind("out"); + auto target = f::VarDesc("out"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"b"}); - f::OpDescBind *fill_op = block->AllOps()[forward_len]; + f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); auto ops = block->AllOps(); ASSERT_EQ(3UL, ops.size()); diff --git a/paddle/framework/block_desc.cc b/paddle/framework/block_desc.cc index 6b961caebd3c0e2af732ce5ff8f1ae5ef2b042aa..2d7db382a60b23eba0924519a0cea9ad3eb90142 100644 --- a/paddle/framework/block_desc.cc +++ b/paddle/framework/block_desc.cc @@ -19,18 +19,18 @@ limitations under the License. */ namespace paddle { namespace framework { -VarDescBind *BlockDescBind::Var(const std::string &name) { +VarDesc *BlockDesc::Var(const std::string &name) { auto it = vars_.find(name); if (it != vars_.end()) { return it->second.get(); } need_update_ = true; - auto *var = new VarDescBind(name); + auto *var = new VarDesc(name); vars_[name].reset(var); return var; } -VarDescBind *BlockDescBind::FindVar(const std::string &name) const { +VarDesc *BlockDesc::FindVar(const std::string &name) const { auto it = vars_.find(name); if (it == vars_.end()) { return nullptr; @@ -38,11 +38,11 @@ VarDescBind *BlockDescBind::FindVar(const std::string &name) const { return it->second.get(); } -bool BlockDescBind::HasVar(const std::string &name) const { +bool BlockDesc::HasVar(const std::string &name) const { return vars_.find(name) != vars_.end(); } -VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const { +VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const { if (name == kEmptyVarName) return nullptr; auto it = vars_.find(name); @@ -53,53 +53,52 @@ VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const { return it->second.get(); } -VarDescBind *BlockDescBind::FindRecursiveOrCreateVar( - const std::string &name_bytes) { - VarDescBind *res = FindVarRecursive(name_bytes); +VarDesc *BlockDesc::FindRecursiveOrCreateVar(const std::string &name_bytes) { + VarDesc *res = FindVarRecursive(name_bytes); if (res == nullptr) { res = Var(name_bytes); } return res; } -bool BlockDescBind::HasVarRecursive(const std::string &name) const { +bool BlockDesc::HasVarRecursive(const std::string &name) const { return FindVarRecursive(name) != nullptr; } -std::vector BlockDescBind::AllVars() const { - std::vector res; +std::vector BlockDesc::AllVars() const { + std::vector res; for (const auto &p : vars_) { res.push_back(p.second.get()); } return res; } -OpDescBind *BlockDescBind::AppendOp() { +OpDesc *BlockDesc::AppendOp() { need_update_ = true; - ops_.emplace_back(new OpDescBind()); + ops_.emplace_back(new OpDesc()); return ops_.back().get(); } -void BlockDescBind::AppendAllocatedOp(std::unique_ptr &&op_desc) { +void BlockDesc::AppendAllocatedOp(std::unique_ptr &&op_desc) { need_update_ = true; ops_.emplace_back(std::move(op_desc)); } -OpDescBind *BlockDescBind::PrependOp() { +OpDesc *BlockDesc::PrependOp() { need_update_ = true; - ops_.emplace_front(new OpDescBind()); + ops_.emplace_front(new OpDesc()); return ops_.front().get(); } -std::vector BlockDescBind::AllOps() const { - std::vector res; +std::vector BlockDesc::AllOps() const { + std::vector res; for (const auto &op : ops_) { res.push_back(op.get()); } return res; } -void BlockDescBind::Flush() { +void BlockDesc::Flush() { for (auto &op_desc : ops_) { op_desc->Flush(); } @@ -121,43 +120,43 @@ void BlockDescBind::Flush() { } } -BlockDescBind *BlockDescBind::ParentBlock() const { +BlockDesc *BlockDesc::ParentBlock() const { if (this->desc_->parent_idx() == kNoneBlockIndex) { return nullptr; } return prog_->MutableBlock(static_cast(this->desc_->parent_idx())); } -proto::BlockDesc *BlockDescBind::Proto() { +proto::BlockDesc *BlockDesc::Proto() { Flush(); return desc_; } -BlockDescBind::BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc) +BlockDesc::BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc) : prog_(prog), desc_(desc), need_update_(false) { for (const proto::VarDesc &var_desc : desc_->vars()) { - vars_[var_desc.name()].reset(new VarDescBind(var_desc)); + vars_[var_desc.name()].reset(new VarDesc(var_desc)); } for (const proto::OpDesc &op_desc : desc_->ops()) { - ops_.emplace_back(new OpDescBind(op_desc, prog)); + ops_.emplace_back(new OpDesc(op_desc, prog)); } } -BlockDescBind::BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc, - ProgramDescBind *prog) +BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc, + ProgramDesc *prog) : prog_(prog), desc_(desc) { need_update_ = true; for (auto &op : other.ops_) { - ops_.emplace_back(new OpDescBind(*op)); + ops_.emplace_back(new OpDesc(*op)); } for (auto &it : other.vars_) { - auto *var = new VarDescBind(*it.second); + auto *var = new VarDesc(*it.second); vars_[it.first].reset(var); } } -void BlockDescBind::ClearPBOps() { +void BlockDesc::ClearPBOps() { auto ops = this->desc_->mutable_ops(); while (!ops->empty()) { // we do not own the OpDesc, so release the ownership. @@ -165,7 +164,7 @@ void BlockDescBind::ClearPBOps() { } } -void BlockDescBind::ClearPBVars() { +void BlockDesc::ClearPBVars() { auto vars = this->desc_->mutable_vars(); while (!vars->empty()) { // we do not own the VarDesc, so release the ownership. diff --git a/paddle/framework/block_desc.h b/paddle/framework/block_desc.h index 592fe49e075a947956c6e34f03068d4c34530a46..513fc54f24c4760b27936e2ac205b2410e9861a4 100644 --- a/paddle/framework/block_desc.h +++ b/paddle/framework/block_desc.h @@ -28,20 +28,19 @@ limitations under the License. */ namespace paddle { namespace framework { -class ProgramDescBind; +class ProgramDesc; // Each Protobuf Message, we provide a XXXBind class. In that class, we optimize // read/write speed. Only when we want the protobuf message, the local changes // will be synchronized (by `Sync` method). -class BlockDescBind { +class BlockDesc { public: - BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc); + BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc); - BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc, - ProgramDescBind *prog); + BlockDesc(const BlockDesc &other, proto::BlockDesc *desc, ProgramDesc *prog); - ~BlockDescBind() { + ~BlockDesc() { this->ClearPBVars(); this->ClearPBOps(); } @@ -50,15 +49,15 @@ class BlockDescBind { int32_t Parent() const { return desc_->parent_idx(); } - VarDescBind *Var(const std::string &name_bytes); + VarDesc *Var(const std::string &name_bytes); - VarDescBind *FindVar(const std::string &name_bytes) const; + VarDesc *FindVar(const std::string &name_bytes) const; bool HasVar(const std::string &var_name) const; - VarDescBind *FindVarRecursive(const std::string &name_bytes) const; + VarDesc *FindVarRecursive(const std::string &name_bytes) const; - VarDescBind *FindRecursiveOrCreateVar(const std::string &name_bytes); + VarDesc *FindRecursiveOrCreateVar(const std::string &name_bytes); bool HasVarRecursive(const std::string &var_name) const; @@ -70,41 +69,41 @@ class BlockDescBind { return var_names; } - std::vector AllVars() const; + std::vector AllVars() const; - BlockDescBind *ParentBlock() const; + BlockDesc *ParentBlock() const; - OpDescBind *AppendOp(); + OpDesc *AppendOp(); - void AppendAllocatedOp(std::unique_ptr &&op_desc); + void AppendAllocatedOp(std::unique_ptr &&op_desc); - OpDescBind *PrependOp(); + OpDesc *PrependOp(); - std::vector AllOps() const; + std::vector AllOps() const; size_t OpSize() const { return ops_.size(); } - OpDescBind *Op(int idx) { return ops_.at(idx).get(); } + OpDesc *Op(int idx) { return ops_.at(idx).get(); } void Flush(); proto::BlockDesc *Proto(); - ProgramDescBind *Program() { return this->prog_; } + ProgramDesc *Program() { return this->prog_; } private: void ClearPBOps(); void ClearPBVars(); private: - ProgramDescBind *prog_; // not_own + ProgramDesc *prog_; // not_own proto::BlockDesc *desc_; // not_own bool need_update_; - std::deque> ops_; - std::unordered_map> vars_; + std::deque> ops_; + std::unordered_map> vars_; - DISABLE_COPY_AND_ASSIGN(BlockDescBind); + DISABLE_COPY_AND_ASSIGN(BlockDesc); }; } // namespace framework } // namespace paddle diff --git a/paddle/framework/details/op_registry.h b/paddle/framework/details/op_registry.h index 435f0b6b78b19dc433f0b761d9ac800565684b7c..7f5151c41d6046f21f7a9707e45de85ec50219ad 100644 --- a/paddle/framework/details/op_registry.h +++ b/paddle/framework/details/op_registry.h @@ -106,10 +106,10 @@ template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { info->grad_op_maker_ = []( - const OpDescBind& fwd_op, + const OpDesc& fwd_op, const std::unordered_set& no_grad_set, std::unordered_map* grad_to_var, - const std::vector& grad_block) { + const std::vector& grad_block) { T maker(fwd_op, no_grad_set, grad_to_var, grad_block); return maker(); }; @@ -119,7 +119,7 @@ struct OpInfoFiller { template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { - info->infer_var_type_ = [](const OpDescBind& fwd_op, BlockDescBind* block) { + info->infer_var_type_ = [](const OpDesc& fwd_op, BlockDesc* block) { T inference; inference(fwd_op, block); }; diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index ea6b259c09012ab1e5576cedeac54e46d21e40dc..c4b76911a67fe02d74dbc1917b0832a39604a366 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -64,7 +64,7 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { } } -void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, +void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool create_local_scope) { // TODO(tonyyang-svail): // - only runs on the first device (i.e. no interdevice communication) diff --git a/paddle/framework/executor.h b/paddle/framework/executor.h index 073e04729b1166f1cabd16709d161fda0d580f1c..fb861d47126c4397792c63d411b973ffd979244d 100644 --- a/paddle/framework/executor.h +++ b/paddle/framework/executor.h @@ -40,6 +40,16 @@ class DeviceContextPool { return *pool; } + const platform::DeviceContext* Borrow(const platform::Place& place) { + auto range = device_contexts_.equal_range(place); + if (range.first == range.second) { + PADDLE_THROW( + "'Place' is not supported, Please re-compile with WITH_GPU " + "option"); + } + return range.first->second; + } + std::vector Borrow( const std::vector& places) { PADDLE_ENFORCE_GT(places.size(), 0); @@ -114,7 +124,7 @@ class Executor { * ProgramDesc * Scope */ - void Run(const ProgramDescBind&, Scope*, int, bool create_local_scope = true); + void Run(const ProgramDesc&, Scope*, int, bool create_local_scope = true); private: std::vector device_contexts_; diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h index 998186e33915a11f2864eb5387d19ed1bfbab51c..cf411fa710103350713342b43946697a8dd2aa46 100644 --- a/paddle/framework/grad_op_desc_maker.h +++ b/paddle/framework/grad_op_desc_maker.h @@ -22,21 +22,27 @@ namespace paddle { namespace framework { +/* + This functor class is responsible for creating the gradient ops for the given + operator fwd_op. After it is called (through operator()), the pairs of + (gradient variable, corresponding input variable of fwd_op) will be added to + grad_to_var. If an input variable of fwd_op is contained in no_grad_set, its + gradient varialbe will be ignored or kEmptyVarName depending on the template + argument DropEmptyIG in the derived classes. + */ class GradOpDescMakerBase { public: explicit GradOpDescMakerBase( - const OpDescBind& fwd_op, - const std::unordered_set& no_grad_set, + const OpDesc& fwd_op, const std::unordered_set& no_grad_set, std::unordered_map* grad_to_var, - const std::vector& grad_block = - std::vector()) + const std::vector& grad_block = std::vector()) : fwd_op_(fwd_op), no_grad_set_(no_grad_set), grad_to_var_(grad_to_var), grad_block_(grad_block) {} virtual ~GradOpDescMakerBase() = default; - virtual std::vector> operator()() const = 0; + virtual std::vector> operator()() const = 0; protected: std::vector InputGrad(const std::string& name, @@ -58,6 +64,16 @@ class GradOpDescMakerBase { if (!drop_empty_grad) { return ret_val; } + PADDLE_ENFORCE_LE(var_names.size(), 1UL, + "BUG from operator developer:" + " for input argument with a list of variables, " + " drop_empty_grad is not allowed because it makes" + " the correspondence bewteen a variable and its gradient" + " ambiguous. Use REGISTER_OP_EX to register the op" + " or call InputGrad(?,false) in GradOpDescMaker." + " Op type %s", + fwd_op_.Type()); + std::vector dropped_ret_val; dropped_ret_val.reserve(ret_val.size()); std::copy_if(ret_val.begin(), ret_val.end(), @@ -105,26 +121,26 @@ class GradOpDescMakerBase { std::string ForwardOpType() const { return this->fwd_op_.Type(); } private: - const OpDescBind& fwd_op_; + const OpDesc& fwd_op_; const std::unordered_set& no_grad_set_; std::unordered_map* grad_to_var_; protected: - std::vector grad_block_; + std::vector grad_block_; }; class SingleGradOpDescMaker : public GradOpDescMakerBase { public: using GradOpDescMakerBase::GradOpDescMakerBase; - std::vector> operator()() const { - std::vector> retv; + std::vector> operator()() const { + std::vector> retv; retv.emplace_back(this->Apply()); return retv; } protected: - virtual std::unique_ptr Apply() const = 0; + virtual std::unique_ptr Apply() const = 0; }; template @@ -133,8 +149,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { using SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - virtual std::unique_ptr Apply() const { - auto* grad = new OpDescBind(); + virtual std::unique_ptr Apply() const { + auto* grad = new OpDesc(); grad->SetType(this->GradOpType()); for (auto& input_param : this->InputNames()) { @@ -150,7 +166,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { grad->SetAttrMap(this->Attrs()); - return std::unique_ptr(grad); + return std::unique_ptr(grad); } virtual std::string GradOpType() const { @@ -161,7 +177,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { class EmptyGradOpMaker : public GradOpDescMakerBase { public: using GradOpDescMakerBase::GradOpDescMakerBase; - std::vector> operator()() const override { + std::vector> operator()() const override { return {}; } }; diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index 7af5b687273d846628e9e7e2c07107a50d47acb3..b361e64438251c1df827667fb825e7f5909fb09e 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -25,12 +25,11 @@ limitations under the License. */ namespace paddle { namespace framework { -class OpDescBind; -class BlockDescBind; +class OpDesc; +class BlockDesc; class CompileTimeInferShapeContext : public InferShapeContext { public: - CompileTimeInferShapeContext(const OpDescBind &op, - const BlockDescBind &block); + CompileTimeInferShapeContext(const OpDesc &op, const BlockDesc &block); bool HasInput(const std::string &name) const override; @@ -76,13 +75,12 @@ class CompileTimeInferShapeContext : public InferShapeContext { void SetDim(const std::string &name, const DDim &dim) override; - const OpDescBind &op_; - const BlockDescBind &block_; + const OpDesc &op_; + const BlockDesc &block_; }; -OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, - const VariableNameMap &outputs, - const AttributeMap &attrs) { +OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs) { desc_.set_type(type); inputs_ = inputs; outputs_ = outputs; @@ -90,7 +88,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, need_update_ = true; } -OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog) +OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog) : desc_(desc), need_update_(false) { // restore inputs_ int input_size = desc_.inputs_size(); @@ -126,20 +124,19 @@ OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog) } } -proto::OpDesc *OpDescBind::Proto() { +proto::OpDesc *OpDesc::Proto() { Flush(); return &desc_; } -const std::vector &OpDescBind::Input( - const std::string &name) const { +const std::vector &OpDesc::Input(const std::string &name) const { auto it = inputs_.find(name); PADDLE_ENFORCE(it != inputs_.end(), "Input %s cannot be found in Op %s", name, Type()); return it->second; } -std::vector OpDescBind::InputArgumentNames() const { +std::vector OpDesc::InputArgumentNames() const { std::vector retv; for (auto &ipt : this->inputs_) { retv.insert(retv.end(), ipt.second.begin(), ipt.second.end()); @@ -147,21 +144,20 @@ std::vector OpDescBind::InputArgumentNames() const { return retv; } -void OpDescBind::SetInput(const std::string ¶m_name, - const std::vector &args) { +void OpDesc::SetInput(const std::string ¶m_name, + const std::vector &args) { need_update_ = true; inputs_[param_name] = args; } -const std::vector &OpDescBind::Output( - const std::string &name) const { +const std::vector &OpDesc::Output(const std::string &name) const { auto it = outputs_.find(name); PADDLE_ENFORCE(it != outputs_.end(), "Output %s cannot be found in Op %s", name, Type()); return it->second; } -std::vector OpDescBind::OutputArgumentNames() const { +std::vector OpDesc::OutputArgumentNames() const { std::vector retv; for (auto &ipt : this->outputs_) { retv.insert(retv.end(), ipt.second.begin(), ipt.second.end()); @@ -169,19 +165,19 @@ std::vector OpDescBind::OutputArgumentNames() const { return retv; } -void OpDescBind::SetOutput(const std::string ¶m_name, - const std::vector &args) { +void OpDesc::SetOutput(const std::string ¶m_name, + const std::vector &args) { need_update_ = true; this->outputs_[param_name] = args; } -proto::AttrType OpDescBind::GetAttrType(const std::string &name) const { +proto::AttrType OpDesc::GetAttrType(const std::string &name) const { auto it = attrs_.find(name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); return static_cast(it->second.which() - 1); } -std::vector OpDescBind::AttrNames() const { +std::vector OpDesc::AttrNames() const { std::vector retv; retv.reserve(attrs_.size()); for (auto &attr : attrs_) { @@ -190,41 +186,39 @@ std::vector OpDescBind::AttrNames() const { return retv; } -void OpDescBind::SetAttr(const std::string &name, const Attribute &v) { +void OpDesc::SetAttr(const std::string &name, const Attribute &v) { this->attrs_[name] = v; need_update_ = true; } -void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { +void OpDesc::SetBlockAttr(const std::string &name, BlockDesc &block) { this->attrs_[name] = █ need_update_ = true; } -void OpDescBind::SetAttrMap( +void OpDesc::SetAttrMap( const std::unordered_map &attr_map) { attrs_ = attr_map; need_update_ = true; } -Attribute OpDescBind::GetAttr(const std::string &name) const { +Attribute OpDesc::GetAttr(const std::string &name) const { auto it = attrs_.find(name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); return it->second; } -int OpDescBind::GetBlockAttr(const std::string &name) const { +int OpDesc::GetBlockAttr(const std::string &name) const { auto it = attrs_.find(name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); - return boost::get(it->second)->ID(); + return boost::get(it->second)->ID(); } -const std::unordered_map &OpDescBind::GetAttrMap() - const { +const std::unordered_map &OpDesc::GetAttrMap() const { return attrs_; } -void OpDescBind::Rename(const std::string &old_name, - const std::string &new_name) { +void OpDesc::Rename(const std::string &old_name, const std::string &new_name) { for (auto &input : inputs_) { std::replace(input.second.begin(), input.second.end(), old_name, new_name); } @@ -235,8 +229,8 @@ void OpDescBind::Rename(const std::string &old_name, need_update_ = true; } -void OpDescBind::RenameOutput(const std::string &old_name, - const std::string &new_name) { +void OpDesc::RenameOutput(const std::string &old_name, + const std::string &new_name) { for (auto &output : outputs_) { std::replace(output.second.begin(), output.second.end(), old_name, new_name); @@ -244,8 +238,8 @@ void OpDescBind::RenameOutput(const std::string &old_name, need_update_ = true; } -void OpDescBind::RenameInput(const std::string &old_name, - const std::string &new_name) { +void OpDesc::RenameInput(const std::string &old_name, + const std::string &new_name) { for (auto &input : inputs_) { std::replace(input.second.begin(), input.second.end(), old_name, new_name); } @@ -278,7 +272,7 @@ struct SetAttrDescVisitor : public boost::static_visitor { void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } }; -void OpDescBind::Flush() { +void OpDesc::Flush() { if (need_update_) { this->desc_.mutable_inputs()->Clear(); for (auto &ipt : inputs_) { @@ -330,7 +324,7 @@ static void InitInferShapeFuncs() { }); } -void OpDescBind::CheckAttrs() { +void OpDesc::CheckAttrs() { PADDLE_ENFORCE(!Type().empty(), "CheckAttr() can not be called before type is setted."); auto *checker = OpInfoMap::Instance().Get(Type()).Checker(); @@ -342,7 +336,7 @@ void OpDescBind::CheckAttrs() { checker->Check(attrs_); } -void OpDescBind::InferShape(const BlockDescBind &block) const { +void OpDesc::InferShape(const BlockDesc &block) const { VLOG(3) << "CompileTime infer shape on " << Type(); InitInferShapeFuncs(); auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_; @@ -365,7 +359,7 @@ void OpDescBind::InferShape(const BlockDescBind &block) const { infer_shape(&ctx); } -void OpDescBind::InferVarType(BlockDescBind *block) const { +void OpDesc::InferVarType(BlockDesc *block) const { auto &info = OpInfoMap::Instance().Get(this->Type()); if (info.infer_var_type_) { info.infer_var_type_(*this, block); @@ -384,7 +378,7 @@ void OpDescBind::InferVarType(BlockDescBind *block) const { } CompileTimeInferShapeContext::CompileTimeInferShapeContext( - const OpDescBind &op, const BlockDescBind &block) + const OpDesc &op, const BlockDesc &block) : op_(op), block_(block) {} bool CompileTimeInferShapeContext::HasInput(const std::string &name) const { diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 0f0f126f9859e729aaf83f8bd13c60f551f0c1d5..93d4a88f3c390551ab41e42ec2f6f30f52e306db 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -23,17 +23,17 @@ limitations under the License. */ namespace paddle { namespace framework { -class BlockDescBind; -class ProgramDescBind; +class BlockDesc; +class ProgramDesc; -class OpDescBind { +class OpDesc { public: - OpDescBind() {} + OpDesc() {} - OpDescBind(const std::string &type, const VariableNameMap &inputs, - const VariableNameMap &outputs, const AttributeMap &attrs); + OpDesc(const std::string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs); - OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog); + OpDesc(const proto::OpDesc &desc, ProgramDesc *prog); proto::OpDesc *Proto(); @@ -65,7 +65,7 @@ class OpDescBind { void SetAttr(const std::string &name, const Attribute &v); - void SetBlockAttr(const std::string &name, BlockDescBind &block); + void SetBlockAttr(const std::string &name, BlockDesc &block); Attribute GetAttr(const std::string &name) const; @@ -107,9 +107,9 @@ class OpDescBind { void CheckAttrs(); - void InferShape(const BlockDescBind &block) const; + void InferShape(const BlockDesc &block) const; - void InferVarType(BlockDescBind *block) const; + void InferVarType(BlockDesc *block) const; void MarkAsTarget() { desc_.set_is_target(true); } @@ -127,7 +127,9 @@ class OpDescBind { } proto::OpDesc desc_; + // input arg name => output variable names VariableNameMap inputs_; + // output arg name => output variable names VariableNameMap outputs_; AttributeMap attrs_; diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc index f202c0b27a7d4d7e7d8fe7b578d2d98bfa978f9b..dfa151316daeccfe92e26818165a694b78b5df62 100644 --- a/paddle/framework/op_registry.cc +++ b/paddle/framework/op_registry.cc @@ -47,7 +47,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap( std::unique_ptr OpRegistry::CreateOp( const proto::OpDesc& op_desc) { VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be" - "used in unit tests. Use CreateOp(const OpDescBind& op_desc) " + "used in unit tests. Use CreateOp(const OpDesc& op_desc) " "instead."; VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); @@ -59,7 +59,7 @@ std::unique_ptr OpRegistry::CreateOp( return CreateOp(op_desc.type(), inputs, outputs, attrs); } -std::unique_ptr OpRegistry::CreateOp(const OpDescBind& op_desc) { +std::unique_ptr OpRegistry::CreateOp(const OpDesc& op_desc) { return CreateOp(op_desc.Type(), op_desc.Inputs(), op_desc.Outputs(), op_desc.GetAttrMap()); } diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 7367e0e637a6d308043f35de628913d060fb379c..7f0155b61f44b676825b84667d5bebb798cae8a3 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -79,7 +79,7 @@ class OpRegistry { static std::unique_ptr CreateOp(const proto::OpDesc& op_desc); - static std::unique_ptr CreateOp(const OpDescBind& op_desc); + static std::unique_ptr CreateOp(const OpDesc& op_desc); }; template @@ -126,6 +126,14 @@ class OpKernelRegistrar : public Registrar { __test_global_namespace_##uniq_name##__>::value, \ msg) +/* + The variadic arguments should be class types derived from one of the + following classes: + OpProtoAndCheckerMaker + GradOpDescMakerBase + VarTypeInference + InferShapeBase +*/ #define REGISTER_OPERATOR(op_type, op_class, ...) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op__##op_type, \ @@ -144,20 +152,29 @@ class OpKernelRegistrar : public Registrar { } /** - * Macro to register Operator. + * Macro to register Operator. When the input is duplicable, you should + * use REGISTER_OP_EX with deop_empty_grad=false instead. */ -#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ - grad_op_class) \ - REGISTER_OPERATOR(grad_op_type, grad_op_class); \ - class _GradOpDescMaker_##grad_op_type##_ \ - : public ::paddle::framework::DefaultGradOpDescMaker { \ - using ::paddle::framework::DefaultGradOpDescMaker< \ - true>::DefaultGradOpDescMaker; \ - \ - protected: \ - virtual std::string GradOpType() const { return #grad_op_type; } \ - }; \ - REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ +#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class) \ + REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class, true) + +// When an argument is duplicable, we need to use this version. +// Perhaps we can omit DropEmptyIG template parameter and +// only have one version of REGISTER_OP. +#define REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class, drop_empty_grad) \ + REGISTER_OPERATOR(grad_op_type, grad_op_class); \ + class _GradOpDescMaker_##grad_op_type##_ \ + : public ::paddle::framework::DefaultGradOpDescMaker { \ + using ::paddle::framework::DefaultGradOpDescMaker< \ + drop_empty_grad>::DefaultGradOpDescMaker; \ + \ + protected: \ + virtual std::string GradOpType() const { return #grad_op_type; } \ + }; \ + REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ op_maker_class); #define REGISTER_OP_WITH_KERNEL(op_type, ...) \ diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc index 30a265ccac1d4287d5cb0d83386860425a1af4c0..b5d9e5e385c1ba57169ef885824fc23b0f130692 100644 --- a/paddle/framework/program_desc.cc +++ b/paddle/framework/program_desc.cc @@ -18,49 +18,49 @@ limitations under the License. */ namespace paddle { namespace framework { -BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { +BlockDesc *ProgramDesc::AppendBlock(const BlockDesc &parent) { auto *b = desc_.add_blocks(); b->set_parent_idx(parent.ID()); b->set_idx(desc_.blocks_size() - 1); - blocks_.emplace_back(new BlockDescBind(this, b)); + blocks_.emplace_back(new BlockDesc(this, b)); return blocks_.back().get(); } -proto::ProgramDesc *ProgramDescBind::Proto() { +proto::ProgramDesc *ProgramDesc::Proto() { for (auto &block : blocks_) { block->Flush(); } return &desc_; } -ProgramDescBind::ProgramDescBind() { +ProgramDesc::ProgramDesc() { auto *block = desc_.mutable_blocks()->Add(); block->set_idx(kRootBlockIndex); block->set_parent_idx(kNoneBlockIndex); - blocks_.emplace_back(new BlockDescBind(this, block)); + blocks_.emplace_back(new BlockDesc(this, block)); } -ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) { +ProgramDesc::ProgramDesc(const ProgramDesc &o) { desc_ = o.desc_; for (int i = 0; i < desc_.blocks_size(); ++i) { auto *block = desc_.mutable_blocks(i); - blocks_.emplace_back(new BlockDescBind(*o.blocks_[i], block, this)); + blocks_.emplace_back(new BlockDesc(*o.blocks_[i], block, this)); } } -ProgramDescBind::ProgramDescBind(const proto::ProgramDesc &desc) { +ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) { desc_ = desc; for (auto &block_desc : *desc_.mutable_blocks()) { - blocks_.emplace_back(new BlockDescBind(this, &block_desc)); + blocks_.emplace_back(new BlockDesc(this, &block_desc)); } } -ProgramDescBind::ProgramDescBind(const std::string &binary_str) { +ProgramDesc::ProgramDesc(const std::string &binary_str) { PADDLE_ENFORCE(desc_.ParseFromString(binary_str), "Fail to parse program_desc from binary string."); for (auto &block_desc : *desc_.mutable_blocks()) { - blocks_.emplace_back(new BlockDescBind(this, &block_desc)); + blocks_.emplace_back(new BlockDesc(this, &block_desc)); } } diff --git a/paddle/framework/program_desc.h b/paddle/framework/program_desc.h index affec491ca598a66129f224a35cbf6019859738c..15a962bb696d6172acd1a83cf9bb1ffd0846d449 100644 --- a/paddle/framework/program_desc.h +++ b/paddle/framework/program_desc.h @@ -23,23 +23,23 @@ limitations under the License. */ namespace paddle { namespace framework { -class BlockDescBind; +class BlockDesc; -class ProgramDescBind { +class ProgramDesc { public: - ProgramDescBind(); + ProgramDesc(); - explicit ProgramDescBind(const proto::ProgramDesc &desc); + explicit ProgramDesc(const proto::ProgramDesc &desc); - ProgramDescBind(const ProgramDescBind &o); + ProgramDesc(const ProgramDesc &o); - explicit ProgramDescBind(const std::string &binary_str); + explicit ProgramDesc(const std::string &binary_str); - BlockDescBind *AppendBlock(const BlockDescBind &parent); + BlockDesc *AppendBlock(const BlockDesc &parent); - BlockDescBind *MutableBlock(size_t idx) { return blocks_[idx].get(); } + BlockDesc *MutableBlock(size_t idx) { return blocks_[idx].get(); } - const BlockDescBind &Block(size_t idx) const { return *blocks_[idx]; } + const BlockDesc &Block(size_t idx) const { return *blocks_[idx]; } size_t Size() const { return blocks_.size(); } @@ -48,7 +48,7 @@ class ProgramDescBind { private: proto::ProgramDesc desc_; - std::vector> blocks_; + std::vector> blocks_; }; } // namespace framework } // namespace paddle diff --git a/paddle/framework/program_desc_test.cc b/paddle/framework/program_desc_test.cc index c4fb28f2cc9bd35439690b1a97692bd13671d3e1..a49886f7ea56bc57459202dba65e3f76a902cd70 100644 --- a/paddle/framework/program_desc_test.cc +++ b/paddle/framework/program_desc_test.cc @@ -19,7 +19,7 @@ namespace paddle { namespace framework { TEST(ProgramDesc, copy_ctor) { - ProgramDescBind program; + ProgramDesc program; auto* global_block = program.MutableBlock(0); auto* x = global_block->Var("X"); x->SetType(proto::VarDesc_VarType_LOD_TENSOR); @@ -42,12 +42,12 @@ TEST(ProgramDesc, copy_ctor) { out->SetType(proto::VarDesc_VarType_LOD_TENSOR); op->SetOutput("Y", {out->Name()}); - ProgramDescBind program_copy(program); + ProgramDesc program_copy(program); auto* global_block_copy = program_copy.MutableBlock(0); ASSERT_NE(global_block, global_block_copy); - auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) { + auto assert_same_var = [&](const std::string& name, VarDesc* var_before) { ASSERT_TRUE(global_block_copy->HasVar(name)); auto* copy = global_block_copy->Var(name); ASSERT_NE(copy, var_before); @@ -81,7 +81,7 @@ TEST(ProgramDesc, copy_ctor) { } TEST(ProgramDescBind, serialize_and_deserialize) { - ProgramDescBind program_origin; + ProgramDesc program_origin; auto* global_block = program_origin.MutableBlock(0); auto* x = global_block->Var("X"); x->SetType(proto::VarDesc_VarType_LOD_TENSOR); @@ -107,11 +107,11 @@ TEST(ProgramDescBind, serialize_and_deserialize) { std::string binary_str; program_origin.Proto()->SerializeToString(&binary_str); - ProgramDescBind program_restored(binary_str); + ProgramDesc program_restored(binary_str); auto* global_block_restored = program_restored.MutableBlock(0); ASSERT_NE(global_block, global_block_restored); - auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) { + auto assert_same_var = [&](const std::string& name, VarDesc* var_before) { ASSERT_TRUE(global_block_restored->HasVar(name)); auto* restored = global_block_restored->Var(name); ASSERT_NE(restored, var_before); diff --git a/paddle/framework/prune_test.cc b/paddle/framework/prune_test.cc index 47fe4b0636c14c5a4f0d4e000a4da8f8951c5d62..bdd57659432ea4f9bdd05425a802110b0c202fb8 100644 --- a/paddle/framework/prune_test.cc +++ b/paddle/framework/prune_test.cc @@ -29,7 +29,7 @@ namespace ops = paddle::operators; void AddOp(const std::string &type, const f::VariableNameMap &inputs, const f::VariableNameMap &outputs, f::AttributeMap attrs, - paddle::framework::BlockDescBind *block) { + paddle::framework::BlockDesc *block) { // insert output for (auto kv : outputs) { for (auto v : kv.second) { @@ -51,8 +51,8 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs, } TEST(Prune, one_operator) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{}, block); @@ -69,8 +69,8 @@ TEST(Prune, one_operator) { } TEST(Prune, forward) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{}, block); @@ -92,8 +92,8 @@ TEST(Prune, forward) { } TEST(Prune, multi_input_op) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); AddOp("one_one", {{"input", {"a0"}}}, {{"output", {"b0"}}}, f::AttributeMap{}, block); @@ -113,8 +113,8 @@ TEST(Prune, multi_input_op) { } TEST(Prune, multi_output_op) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}}, f::AttributeMap{}, block); @@ -132,8 +132,8 @@ TEST(Prune, multi_output_op) { } TEST(Prune, multi_target) { - f::ProgramDescBind program; - f::BlockDescBind *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}}, f::AttributeMap{}, block); diff --git a/paddle/framework/type_defs.h b/paddle/framework/type_defs.h index baeb98c9bd49ec65da5931bcbe33ab788f86f3e8..da152e8b9d23490e2e69c2dd215c45355e1c1e44 100644 --- a/paddle/framework/type_defs.h +++ b/paddle/framework/type_defs.h @@ -25,11 +25,9 @@ namespace paddle { namespace framework { class OperatorBase; -class OpDescBind; -class BlockDescBind; -class BlockDesc; +class OpDesc; class InferShapeContext; -class BlockDescBind; +class BlockDesc; using VariableNameMap = std::map>; @@ -37,7 +35,7 @@ using VariableNameMap = std::map>; using Attribute = boost::variant, std::vector, std::vector, bool, - std::vector, BlockDescBind*>; + std::vector, BlockDesc*>; using AttributeMap = std::unordered_map; @@ -45,13 +43,13 @@ using OpCreator = std::function; -using GradOpMakerFN = std::function>( - const OpDescBind&, const std::unordered_set& /*no_grad_set*/, +using GradOpMakerFN = std::function>( + const OpDesc&, const std::unordered_set& /*no_grad_set*/, std::unordered_map* /*grad_to_var*/, - const std::vector& grad_block)>; + const std::vector& grad_block)>; -using InferVarTypeFN = std::function; +using InferVarTypeFN = + std::function; using InferShapeFN = std::function; diff --git a/paddle/framework/var_desc.cc b/paddle/framework/var_desc.cc index 2180827767e73b7ce51e05e402de8b0dd68571bd..bd8973eeb369aabd2c52d4fccf799657c564ee78 100644 --- a/paddle/framework/var_desc.cc +++ b/paddle/framework/var_desc.cc @@ -18,29 +18,27 @@ limitations under the License. */ namespace paddle { namespace framework { -proto::VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); } +proto::VarDesc::VarType VarDesc::GetType() const { return desc_.type(); } -void VarDescBind::SetType(proto::VarDesc::VarType type) { - desc_.set_type(type); -} +void VarDesc::SetType(proto::VarDesc::VarType type) { desc_.set_type(type); } -void VarDescBind::SetShape(const std::vector &dims) { +void VarDesc::SetShape(const std::vector &dims) { VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims()); } -void VarDescBind::SetDataType(proto::DataType data_type) { +void VarDesc::SetDataType(proto::DataType data_type) { mutable_tensor_desc()->set_data_type(data_type); } -std::vector VarDescBind::Shape() const { +std::vector VarDesc::Shape() const { return RepeatedToVector(tensor_desc().dims()); } -proto::DataType VarDescBind::GetDataType() const { +proto::DataType VarDesc::GetDataType() const { return tensor_desc().data_type(); } -void VarDescBind::SetLoDLevel(int32_t lod_level) { +void VarDesc::SetLoDLevel(int32_t lod_level) { switch (desc_.type()) { case proto::VarDesc::LOD_TENSOR: desc_.mutable_lod_tensor()->set_lod_level(lod_level); @@ -54,7 +52,7 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) { } } -int32_t VarDescBind::GetLodLevel() const { +int32_t VarDesc::GetLodLevel() const { switch (desc_.type()) { case proto::VarDesc::LOD_TENSOR: return desc_.lod_tensor().lod_level(); @@ -66,7 +64,7 @@ int32_t VarDescBind::GetLodLevel() const { } } -const proto::TensorDesc &VarDescBind::tensor_desc() const { +const proto::TensorDesc &VarDesc::tensor_desc() const { PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type"); switch (desc_.type()) { case proto::VarDesc::SELECTED_ROWS: @@ -80,7 +78,7 @@ const proto::TensorDesc &VarDescBind::tensor_desc() const { } } -proto::TensorDesc *VarDescBind::mutable_tensor_desc() { +proto::TensorDesc *VarDesc::mutable_tensor_desc() { PADDLE_ENFORCE(desc_.has_type(), "invoke MutableTensorDesc must after set type"); switch (desc_.type()) { diff --git a/paddle/framework/var_desc.h b/paddle/framework/var_desc.h index 335a864cabfe575e153cd5c44b5cd30c312914a2..4fd2abe7fb215c3ac454de3e30754685111eb570 100644 --- a/paddle/framework/var_desc.h +++ b/paddle/framework/var_desc.h @@ -53,14 +53,14 @@ inline void VectorToRepeated(const std::vector &vec, } } -class VarDescBind { +class VarDesc { public: - explicit VarDescBind(const std::string &name) { + explicit VarDesc(const std::string &name) { desc_.set_name(name); desc_.set_type(proto::VarDesc::LOD_TENSOR); } - explicit VarDescBind(const proto::VarDesc &desc) : desc_(desc) {} + explicit VarDesc(const proto::VarDesc &desc) : desc_(desc) {} proto::VarDesc *Proto() { return &desc_; } diff --git a/paddle/framework/var_type_inference.h b/paddle/framework/var_type_inference.h index 32abbeb33479444c5e7a9889f4211f59af07f98f..1a4dca05f741f33d58eeccda9d1f800aadb8c01f 100644 --- a/paddle/framework/var_type_inference.h +++ b/paddle/framework/var_type_inference.h @@ -21,8 +21,7 @@ namespace framework { class VarTypeInference { public: virtual ~VarTypeInference() {} - virtual void operator()(const OpDescBind& op_desc, - BlockDescBind* block) const = 0; + virtual void operator()(const OpDesc& op_desc, BlockDesc* block) const = 0; }; } // namespace framework diff --git a/paddle/framework/var_type_inference_test.cc b/paddle/framework/var_type_inference_test.cc index 8b465cbc59c50fbbe48036eb7dc09847d21d9d0b..92f333c558413ac3253c0fb8a20d6f0cfa33f99c 100644 --- a/paddle/framework/var_type_inference_test.cc +++ b/paddle/framework/var_type_inference_test.cc @@ -33,8 +33,7 @@ class SumOpMaker : public OpProtoAndCheckerMaker { class SumOpVarTypeInference : public VarTypeInference { public: - void operator()(const OpDescBind &op_desc, - BlockDescBind *block) const override { + void operator()(const OpDesc &op_desc, BlockDesc *block) const override { auto &inputs = op_desc.Input("X"); auto default_var_type = proto::VarDesc::SELECTED_ROWS; @@ -62,7 +61,7 @@ namespace paddle { namespace framework { TEST(InferVarType, sum_op) { - ProgramDescBind prog; + ProgramDesc prog; auto *op = prog.MutableBlock(0)->AppendOp(); op->SetType("sum"); op->SetInput("X", {"test_a", "test_b", "test_c"}); @@ -85,7 +84,7 @@ TEST(InferVarType, sum_op) { } TEST(InferVarType, sum_op_without_infer_var_type) { - ProgramDescBind prog; + ProgramDesc prog; auto *op = prog.MutableBlock(0)->AppendOp(); op->SetType("sum_without_infer_var_type"); op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); diff --git a/paddle/memory/memcpy.cc b/paddle/memory/memcpy.cc index 1df88a6da9fb0c50d0d7ecd083c0533d8a886a67..5c629dc3d2aca2705e439df836214c1284b31c8f 100644 --- a/paddle/memory/memcpy.cc +++ b/paddle/memory/memcpy.cc @@ -62,33 +62,6 @@ void Copy(platform::GPUPlace dst_place, } } -template <> -void Copy(platform::CPUPlace dst_place, - void* dst, - platform::GPUPlace src_place, - const void* src, size_t num) { - platform::SetDeviceId(src_place.device); - platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost); -} - -template <> -void Copy(platform::GPUPlace dst_place, - void* dst, - platform::CPUPlace src_place, - const void* src, size_t num) { - platform::SetDeviceId(dst_place.device); - platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice); -} - -template <> -void Copy(platform::GPUPlace dst_place, - void* dst, - platform::GPUPlace src_place, - const void* src, size_t num) { - platform::SetDeviceId(dst_place.device); - platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToDevice); -} - #endif } // namespace memory diff --git a/paddle/operators/array_to_lod_tensor_op.cc b/paddle/operators/array_to_lod_tensor_op.cc index aafdb8fb248397d78360745270d390ff4d60e7eb..b6ca3cad94425207629160a4c7d715f685b23a09 100644 --- a/paddle/operators/array_to_lod_tensor_op.cc +++ b/paddle/operators/array_to_lod_tensor_op.cc @@ -149,14 +149,14 @@ class ArrayToLoDTensorGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("lod_tensor_to_array"); grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("RankTable", Input("RankTable")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttrMap(Attrs()); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc index 0d98755aa07e4b655e74211ffecb039254e50608..a914ff4ba92318c75326bd7945bb73bcb93b6fc3 100644 --- a/paddle/operators/assign_op.cc +++ b/paddle/operators/assign_op.cc @@ -121,12 +121,12 @@ class AssignGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *op = new framework::OpDesc(); op->SetType("assign"); op->SetInput("X", OutputGrad("Out")); op->SetOutput("Out", InputGrad("X")); - return std::unique_ptr(op); + return std::unique_ptr(op); } }; diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc index ceb20cbe18445cc6fbaea6ba1c64f143e88fb9c9..32756faac5324cfb3b5366857d2c8176665fb3ec 100644 --- a/paddle/operators/beam_search_decode_op.cc +++ b/paddle/operators/beam_search_decode_op.cc @@ -119,8 +119,8 @@ class BeamSearchDecodeInferShape : public framework::InferShapeBase { class BeamSearchDecodeInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDescBind& op_desc, - framework::BlockDescBind* block) const override { + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const override { for (auto& o : op_desc.Output("SentenceIds")) { block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR); } diff --git a/paddle/operators/cast_op.cc b/paddle/operators/cast_op.cc index 927a32645ccb6cd8aa225f3ce1ba78045bd8fd19..fc6da064904610f5c9c140a6328858d697dd954e 100644 --- a/paddle/operators/cast_op.cc +++ b/paddle/operators/cast_op.cc @@ -52,14 +52,14 @@ class CastOpGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto grad = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto grad = new framework::OpDesc(); grad->SetType("cast"); grad->SetInput("X", OutputGrad("Out")); grad->SetOutput("Out", InputGrad("X")); grad->SetAttr("out_dtype", GetAttr("in_dtype")); grad->SetAttr("in_dtype", GetAttr("out_dtype")); - return std::unique_ptr(grad); + return std::unique_ptr(grad); } }; diff --git a/paddle/operators/concat_op.cc b/paddle/operators/concat_op.cc index 6151e2e73fb33f01794f81bd176fde7e5579a5c8..32b61edfd0dd163e5ef8f3d1de133c55314458b5 100644 --- a/paddle/operators/concat_op.cc +++ b/paddle/operators/concat_op.cc @@ -98,8 +98,8 @@ class ConcatOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad, - ops::ConcatOpGrad) +REGISTER_OP_EX(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad, + ops::ConcatOpGrad, false) REGISTER_OP_CPU_KERNEL(concat, ops::ConcatKernel) REGISTER_OP_CPU_KERNEL(concat_grad, diff --git a/paddle/operators/conditional_block_op.cc b/paddle/operators/conditional_block_op.cc index 5fe362c1b630867d68b566cb9660b57860368d70..204be7d1e5385b7fdab54914bec216543e360cd3 100644 --- a/paddle/operators/conditional_block_op.cc +++ b/paddle/operators/conditional_block_op.cc @@ -65,7 +65,7 @@ class ConditionalBlockOp : public ConditionalOp { scopes->front() = &scope.NewScope(); auto &cur_scope = *scopes->front(); - auto *block = Attr("sub_block"); + auto *block = Attr("sub_block"); framework::Executor exec(dev_ctx); exec.Run(*block->Program(), &cur_scope, block->ID(), false); } @@ -86,7 +86,7 @@ class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker { "(std::vector) The step scope of conditional block. To " "unify the conditional block, rnn and while op, the type of " "scope is std::vector"); - AddAttr( + AddAttr( "sub_block", "The step block of conditional block operator"); AddComment(R"DOC(Conditional block operator @@ -116,7 +116,7 @@ class ConditionalBlockGradOp : public ConditionalOp { auto &scopes = scope_var->Get>(); framework::Scope &cur_scope = *scopes[0]; - auto *block = Attr("sub_block"); + auto *block = Attr("sub_block"); framework::Executor exec(dev_ctx); exec.Run(*block->Program(), &cur_scope, block->ID(), false); @@ -170,18 +170,19 @@ class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto grad_op = new framework::OpDesc(); grad_op->SetType("conditional_block_grad"); grad_op->SetInput("X", Input("X")); grad_op->SetInput("Params", Input("Params")); grad_op->SetInput("Out", Output("Out")); grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); grad_op->SetInput("Scope", Output("Scope")); - grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X")); - grad_op->SetOutput(framework::GradVarName("Params"), InputGrad("Params")); + grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X", false)); + grad_op->SetOutput(framework::GradVarName("Params"), + InputGrad("Params", false)); grad_op->SetBlockAttr("sub_block", *this->grad_block_[0]); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc index 2348bed4ff1c658e2b5614a3bdd94dfb554ad705..8980ff91f5d7c585d9ce0ce62cfb90f47ea86ec6 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cc @@ -21,8 +21,6 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { public: CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker) : Conv2DTransposeOpMaker(proto, op_checker) { - AddAttr>("dilations", "dilations of convolution operator.") - .SetDefault({1, 1}); AddAttr("workspace_size_MB", "workspace size for cudnn, in MB, " "workspace is a section of GPU memory which will be " @@ -37,8 +35,6 @@ class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker { public: CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker) : Conv3DTransposeOpMaker(proto, op_checker) { - AddAttr>("dilations", "dilations of convolution operator.") - .SetDefault({1, 1, 1}); AddAttr("workspace_size_MB", "workspace size for cudnn, in MB, " "workspace is a section of GPU memory which will be " diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index cae0e2ca2b472818463e109cb700da7f62180926..5e24fc4b2c8b479caac417c957033f7552e1c3f0 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -29,6 +29,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { auto filter_dims = ctx->GetInputDim("Filter"); std::vector strides = ctx->Attrs().Get>("strides"); std::vector paddings = ctx->Attrs().Get>("paddings"); + std::vector dilations = ctx->Attrs().Get>("dilations"); PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, "ConvTransposeOp intput should be 4-D or 5-D tensor."); @@ -41,14 +42,18 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(paddings.size(), strides.size(), "ConvTransposeOp paddings dimension and strides " "dimension should be the same."); + PADDLE_ENFORCE_EQ(paddings.size(), dilations.size(), + "ConvTransposeOp paddings dimension and dilations " + "dimension should be the same."); PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0], "In ConvTransposeOp, The input channel should be the same " "as the number of filters."); std::vector output_shape({in_dims[0], filter_dims[1]}); for (size_t i = 0; i < strides.size(); ++i) { + auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1; output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + - filter_dims[i + 2]); + filter_extent); } ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); } @@ -73,6 +78,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto, AddOutput("Output", "(Tensor) The output tensor of convolution transpose operator. " "The format of output tensor is also NCHW."); + + AddAttr>("dilations", + "(vector default:{1, 1}), the " + "dilations(h_dilation, w_dilation) of convolution " + "transpose operator.") + .SetDefault({1, 1}); AddAttr>( "strides", "(vector default:{1, 1}), the strides(h_stride, w_stride) of " @@ -87,7 +98,7 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto, Convolution2D Transpose Operator. The convolution transpose operation calculates the output based on the input, filter -and strides, paddings, groups parameters. The size of each dimension of the +and dilations, strides, paddings, groups parameters. The size of each dimension of the parameters is checked in the infer-shape. Input(Input) and output(Output) are in NCHW format. Where N is batchsize, C is the number of channels, H is the height of the feature, and W is the width of the feature. @@ -136,6 +147,13 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto, "Where N is batch size, C is " "the number of channels, D is the depth of the feature, H is the " "height of the feature, and W is the width of the feature."); + + AddAttr>( + "dilations", + "(vector default:{1, 1, 1}), the " + "dilations(d_dilation,h_dilation, w_dilation) of convolution " + "transpose operator.") + .SetDefault({1, 1, 1}); AddAttr>("strides", "(vector default:{1, 1, 1}), the " "strides{d_stride, h_stride, w_stride} of " @@ -149,7 +167,7 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto, Convolution3D Transpose Operator. The convolution transpose operation calculates the output based on the input, filter -and strides, paddings, groups parameters. The size of each dimension of the +and dilations, strides, paddings, groups parameters. The size of each dimension of the parameters is checked in the infer-shape. Input(Input) and output(Output) are in NCDHW format. Where N is batch size, C is the number of channels, D is the depth of the feature, H is the height of the feature, diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index e81651f417a5b96a1f25ae9ca9228d332a16bc6d..4c8f8a80672788e8b2919e500d3627adec1ad035 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -61,6 +61,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); + std::vector dilations = context.Attr>("dilations"); // groups will alway be disabled in conv2dtranspose. const int batch_size = static_cast(input->dims()[0]); @@ -113,7 +114,6 @@ class GemmConvTransposeKernel : public framework::OpKernel { math::Col2ImFunctor col2im; math::Col2VolFunctor col2vol; - std::vector dilations({1, 1, 1}); // convolution transpose: gemm + col2im or col2vol (similar to conv-backward // on input) @@ -165,6 +165,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); + std::vector dilations = context.Attr>("dilations"); const int batch_size = static_cast(input->dims()[0]); @@ -219,7 +220,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { math::Im2ColFunctor im2col; math::Vol2ColFunctor vol2col; - std::vector dilations({1, 1, 1}); if (input_grad) { input_grad->mutable_data(context.GetPlace()); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 3e828f84d076f1afc841db2c1664f5f5d9c90dc6..b4ae1de876010effff6bf577a4e33043f6760a4f 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -24,10 +24,10 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of FillZerosLikeOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Y"), - "Output(Y) of FillZerosLikeOp should not be null."); - ctx->SetOutputDim("Y", ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*->*/ "Y"); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of FillZerosLikeOp should not be null."); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*->*/ "Out"); } }; @@ -36,7 +36,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker { FillZerosLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of fill-zeros-like op."); - AddOutput("Y", "The variable will be filled up with zeros."); + AddOutput("Out", "The variable will be filled up with zeros."); AddComment(R"DOC( FillZerosLike Operator. diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index a6e2941f52150de7886717303d2cb2f10b7eef7b..351ecf8b2f1d945fabdd1d6c5ed56f76f3caae61 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -23,7 +23,7 @@ template class FillZerosLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* out = context.Output("Y"); + auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); math::SetConstant setter; diff --git a/paddle/operators/increment_op.cc b/paddle/operators/increment_op.cc index 3a53ea89dc9a73d170ca5a9fe6cffde02a9f283b..789c92102d63355a80c3330f2107c731206397f4 100644 --- a/paddle/operators/increment_op.cc +++ b/paddle/operators/increment_op.cc @@ -93,13 +93,13 @@ class IncrementGradOpMaker : public framework::SingleGradOpDescMaker { public: using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("increment"); grad_op->SetInput("X", Output("Out")); grad_op->SetOutput("Out", Input("X")); grad_op->SetAttr("step", -boost::get(GetAttr("step"))); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/lod_rank_table_op.cc b/paddle/operators/lod_rank_table_op.cc index 46577d0c5821a1738bd050815f46776591bdfdde..2d67046bfee01d8d148da1c8b705d3ad959a4839 100644 --- a/paddle/operators/lod_rank_table_op.cc +++ b/paddle/operators/lod_rank_table_op.cc @@ -63,8 +63,8 @@ class LoDRankTableInferShape : public framework::InferShapeBase { class LoDRankTableInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDescBind &op_desc, - framework::BlockDescBind *block) const override { + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { for (auto &o : op_desc.Output("Out")) { block->FindRecursiveOrCreateVar(o)->SetType( framework::proto::VarDesc::LOD_RANK_TABLE); diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc index 33af0e819f757b574b1a4cc3426a8375bdd702c6..643f8859f3d0d44c0b5be922bd786ab04093df94 100644 --- a/paddle/operators/lod_tensor_to_array_op.cc +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -127,8 +127,8 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase { class LoDTensorToArrayInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDescBind &op_desc, - framework::BlockDescBind *block) const override { + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { for (auto &out_var : op_desc.Output("Out")) { block->Var(out_var)->SetType(framework::proto::VarDesc::LOD_TENSOR_ARRAY); } @@ -140,14 +140,14 @@ class LoDTensorToArrayGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("array_to_lod_tensor"); grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("RankTable", Input("RankTable")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttrMap(Attrs()); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 606b44808edf11a72eb154cbdc8356bb1fc9b9d5..0a9defa8c50453abf3eefdcb89126b1349d6d756 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -108,8 +108,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDescBind& op_desc, - framework::BlockDescBind* block) const override { + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const override { auto out_var_name = op_desc.Output(framework::GradVarName("W")).front(); auto attr = op_desc.GetAttr("is_sparse"); bool is_sparse = boost::get(attr); diff --git a/paddle/operators/math/math_function_impl.h b/paddle/operators/math/math_function_impl.h index 3e6d83386589a02c7d8f62394c1c2becb606504c..aced2690bce9a4c2db6309f18e23a8f6cd0211f3 100644 --- a/paddle/operators/math/math_function_impl.h +++ b/paddle/operators/math/math_function_impl.h @@ -67,18 +67,45 @@ void RowwiseAdd::operator()(const DeviceContext& context, template void ColwiseSum::operator()(const DeviceContext& context, const framework::Tensor& input, - framework::Tensor* vector) { + framework::Tensor* out) { auto in_dims = input.dims(); auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(vector->numel(), size); + PADDLE_ENFORCE_EQ(out->numel(), size); - auto vec = framework::EigenMatrix::From(*vector); auto in = framework::EigenMatrix::From(input); - Eigen::array shape({{1, static_cast(size)}}); - vec.reshape(shape).device(*context.eigen_device()) = - in.sum(Eigen::array({{0}})).reshape(shape); + auto vec = framework::EigenVector::Flatten(*out); + + vec.device(*context.eigen_device()) = in.sum(Eigen::array({{0}})); } +// Specialize for CPU, since Eigen implement a general reduce. However, +// colwise-sum can be easily implemented. General reduce has a huge overhead in +// CPU +template +class ColwiseSum { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, framework::Tensor* out) { + auto& in_dims = input.dims(); + auto height = in_dims[0]; + auto size = in_dims[1]; + PADDLE_ENFORCE_EQ(out->numel(), size); + + T* out_buf = out->mutable_data(out->place()); + const T* in_buf = input.data(); + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < size; ++j) { + if (i == 0) { + out_buf[j] = in_buf[i * size + j]; + } else { + out_buf[j] += in_buf[i * size + j]; + } + } + } + } +}; + } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index e27f9eeac6e7cdc0615f1368b21df252866d9b7e..411f4d14efbfa5a8ee6dd7da645a044b191bf006 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -60,13 +60,13 @@ class MeanGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto* grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto* grad_op = new framework::OpDesc(); grad_op->SetType("mean_grad"); grad_op->SetInput("X", Input("X")); grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X")); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc index ec76cfdf279c93f28cf54c744eafe07b3957c06b..5edf29c3af958f5a939fdb830d46aca4b8d3dbe0 100644 --- a/paddle/operators/merge_lod_tensor_op.cc +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -161,15 +161,15 @@ class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("split_lod_tensor"); grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("Mask", Input("Mask")); grad_op->SetOutput("OutTrue", InputGrad("InTrue")); grad_op->SetOutput("OutFalse", InputGrad("InFalse")); grad_op->SetAttrMap(Attrs()); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc index eb65fededfd633de51b0c418b78aab0726e70320..2e9cc9d29d8c92ac56b451834f930758216e6a44 100644 --- a/paddle/operators/minus_op.cc +++ b/paddle/operators/minus_op.cc @@ -70,12 +70,11 @@ class MinusGradMaker : public framework::GradOpDescMakerBase { public: using framework::GradOpDescMakerBase::GradOpDescMakerBase; - std::vector> operator()() - const override { - std::vector> ops; + std::vector> operator()() const override { + std::vector> ops; auto x_g = InputGrad("X"); if (!x_g.empty()) { - auto *x_g_op = new framework::OpDescBind(); + auto *x_g_op = new framework::OpDesc(); x_g_op->SetType("scale"); x_g_op->SetInput("X", OutputGrad("Out")); x_g_op->SetOutput("Out", x_g); @@ -85,7 +84,7 @@ class MinusGradMaker : public framework::GradOpDescMakerBase { auto y_g = InputGrad("Y"); if (!y_g.empty()) { - auto *y_g_op = new framework::OpDescBind(); + auto *y_g_op = new framework::OpDesc(); y_g_op->SetType("scale"); y_g_op->SetInput("X", OutputGrad("Out")); y_g_op->SetOutput("Out", y_g); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index a4bf0711de0efe0967b1211b7f32e5e2245860bc..599df9c3df58db6444d7cb729e1a2c1f9f628b5b 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -73,39 +73,50 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { public: MulOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of mul op"); - AddInput("Y", "The second input of mul op"); - AddOutput("Out", "The output of mul op"); + AddInput("X", "(Tensor), The first input tensor of mul op."); + AddInput("Y", "(Tensor), The second input tensor of mul op."); + AddOutput("Out", "(Tensor), The output tensor of mul op."); AddAttr( "x_num_col_dims", - "(int, default 1) " - R"DOC(mul_op can take tensors with more than two dimensions as input `X`, - in that case, tensors will be reshaped to a matrix. The matrix's first - dimension(column length) will be the product of tensor's last - `num_col_dims` dimensions, and the matrix's second dimension(row length) - will be the product of tensor's first `rank - num_col_dims` dimensions. + R"DOC((int, default 1), The mul_op can take tensors with more than two + dimensions as its inputs. If the input $X$ is a tensor with more + than two dimensions, $X$ will be flattened into a two-dimensional + matrix first. The flattening rule is: the first `num_col_dims` + will be flattened to form the first dimension of the final matrix + (the height of the matrix), and the rest `rank(X) - num_col_dims` + dimensions are flattened to form the second dimension of the final + matrix (the width of the matrix). As a result, height of the + flattened matrix is equal to the product of $X$'s first + `x_num_col_dims` dimensions' sizes, and width of the flattened + matrix is equal to the product of $X$'s last `rank(x) - num_col_dims` + dimensions' size. For example, suppose $X$ is a 6-dimensional + tensor with the shape [2, 3, 4, 5, 6], and `x_num_col_dims` = 3. + Thus, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = + [24, 30]. )DOC") .SetDefault(1) .EqualGreaterThan(1); AddAttr( "y_num_col_dims", - "(int, default 1) " - R"DOC(mul_op can take tensors with more than two dimensions as input `Y`, - in that case, tensors will be reshaped to a matrix. Just like input `X`. + R"DOC((int, default 1), The mul_op can take tensors with more than two, + dimensions as its inputs. If the input $Y$ is a tensor with more + than two dimensions, $Y$ will be flattened into a two-dimensional + matrix first. The attribute `y_num_col_dims` determines how $Y$ is + flattened. See comments of `x_num_col_dims` for more details. )DOC") .SetDefault(1) .EqualGreaterThan(1); AddComment(R"DOC( -Mul Operator. +Mul Operator. -This operator is used to perform matrix multiplication for input X and Y. +This operator is used to perform matrix multiplication for input $X$ and $Y$. The equation is: $$Out = X * Y$$ -Both the input `X` and `Y` can carry the LoD (Level of Details) information, -or not. But the output only shares the LoD information with input `X`. +Both the input $X$ and $Y$ can carry the LoD (Level of Details) information, +or not. But the output only shares the LoD information with input $X$. )DOC"); } diff --git a/paddle/operators/nccl_op_test.cu.cc b/paddle/operators/nccl_op_test.cu.cc index d747cc0cf5f74b886bbd40549673e7d64de952e9..c1046aadafbde303a3a8b12f2377018396b9adb8 100644 --- a/paddle/operators/nccl_op_test.cu.cc +++ b/paddle/operators/nccl_op_test.cu.cc @@ -65,7 +65,7 @@ class NCCLTester : public ::testing::Test { } void NCCLInitOp() { - std::unique_ptr op1(new f::OpDescBind); + std::unique_ptr op1(new f::OpDesc); op1->SetType("ncclInit"); op1->SetOutput("Communicator", {"comm"}); @@ -81,10 +81,9 @@ class NCCLTester : public ::testing::Test { } template - void PerThreadProgram(int gpu_id, const f::OpDescBind &op_desc, - f::Scope *scope) { + void PerThreadProgram(int gpu_id, const f::OpDesc &op_desc, f::Scope *scope) { std::unique_lock lk(mu); - const f::OpDescBind *op1 = &op_desc; + const f::OpDesc *op1 = &op_desc; p::GPUPlace place(gpu_id); auto &ctx = dev_ctxs.at(gpu_id); @@ -125,7 +124,7 @@ class NCCLTester : public ::testing::Test { // ncclInitOp with desc TEST(NCCL, ncclInitOp) { - std::unique_ptr op_desc(new f::OpDescBind); + std::unique_ptr op_desc(new f::OpDesc); op_desc->SetType("ncclInit"); op_desc->SetOutput("Communicator", {"x1"}); @@ -145,7 +144,7 @@ TEST(NCCL, ncclInitOp) { // ncclAllReduceOp with desc TEST_F(NCCLTester, ncclAllReduceOp) { - std::unique_ptr op2(new f::OpDescBind); + std::unique_ptr op2(new f::OpDesc); op2->SetType("ncclAllReduce"); op2->SetInput("X", {"st"}); op2->SetInput("Communicator", {"comm"}); @@ -192,7 +191,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) { // ncclReduceOp with desc TEST_F(NCCLTester, ncclReduceOp) { - std::unique_ptr op2(new f::OpDescBind); + std::unique_ptr op2(new f::OpDesc); const int kRoot = 0; op2->SetType("ncclReduce"); op2->SetInput("X", {"st"}); @@ -240,7 +239,7 @@ TEST_F(NCCLTester, ncclReduceOp) { // ncclBcastOp with desc TEST_F(NCCLTester, ncclBcastOp) { - std::unique_ptr op2(new f::OpDescBind); + std::unique_ptr op2(new f::OpDesc); const int kRoot = 5; op2->SetType("ncclBcast"); op2->SetInput("X", {"st"}); diff --git a/paddle/operators/pad_op.cc b/paddle/operators/pad_op.cc index 8d2d031fcdb6bc4ee955b6db6df2892aa7ee5d53..40f7a7eed53354fa65373830b0972c0e72ef54da 100644 --- a/paddle/operators/pad_op.cc +++ b/paddle/operators/pad_op.cc @@ -116,14 +116,14 @@ class PadOpGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto* bind = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto* bind = new framework::OpDesc(); bind->SetInput("X", Input("X")); bind->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); bind->SetOutput(framework::GradVarName("X"), InputGrad("X")); bind->SetAttrMap(Attrs()); bind->SetType("pad_grad"); - return std::unique_ptr(bind); + return std::unique_ptr(bind); } }; diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index ca3a063553d5c8eae3d62d63061bb13a4b3f8365..5981d5745d24e0b2fe68bf8b9852cb8a6094885f 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -234,7 +234,7 @@ class RecurrentOp : public RecurrentBase { auto reverse = Attr(kReverse); framework::Executor executor(dev_ctx); - auto *block = Attr(kStepBlock); + auto *block = Attr(kStepBlock); auto *program = block->Program(); for (size_t i = 0; i < seq_len; ++i) { @@ -317,7 +317,7 @@ class RecurrentGradOp : public RecurrentBase { auto reverse = Attr(kReverse); framework::Executor executor(dev_ctx); - auto *block = Attr(kStepBlock); + auto *block = Attr(kStepBlock); auto *program = block->Program(); for (size_t step_id = 0; step_id < seq_len; ++step_id) { @@ -522,8 +522,7 @@ The ex-state means the state value in the ex-timestep or the previous time step string::Sprintf( "The state variable names. [%s, %s, %s] must be the same order", kExStates, kStates, kInitStateGrads)); - AddAttr(kStepBlock, - "The step block inside RNN"); + AddAttr(kStepBlock, "The step block inside RNN"); AddAttr(kReverse, R"DOC(Calculate RNN reversely or not. By default reverse=False @@ -565,13 +564,13 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - virtual std::unique_ptr Apply() const { - auto *grad = new framework::OpDescBind(); + virtual std::unique_ptr Apply() const { + auto *grad = new framework::OpDesc(); grad->SetType("recurrent_grad"); for (auto &input_param : this->InputNames()) { grad->SetInput(input_param, this->Input(input_param)); grad->SetOutput(framework::GradVarName(input_param), - this->InputGrad(input_param)); + this->InputGrad(input_param, false)); } for (auto &output_param : this->OutputNames()) { @@ -588,7 +587,7 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker { grad->SetAttrMap(this->Attrs()); grad->SetBlockAttr(kStepBlock, *grad_block_[0]); - return std::unique_ptr(grad); + return std::unique_ptr(grad); } }; diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index 98170c0d1b22fd2cbc57c29295e6887c1bb1fa68..ee39888713544703ee8d305b2c04e4b03deeceab 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -58,13 +58,13 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker { public: using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("scale"); grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttr("scale", GetAttr("scale")); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/sequence_concat_op.cc b/paddle/operators/sequence_concat_op.cc index 54e8989f256e61ce9d4f76b631a3906773d04a2e..2f0aad2003e48952ca26ca27573bc45386a4e585 100644 --- a/paddle/operators/sequence_concat_op.cc +++ b/paddle/operators/sequence_concat_op.cc @@ -67,12 +67,12 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker { "The level should be less than the level number of inputs.") .SetDefault(0); AddComment(R"DOC( -The sequence_concat operator concatenates multiple LoDTensors. -It only supports sequence (LoD Tensor with level number is 1) +The sequence_concat operator concatenates multiple LoDTensors. +It only supports sequence (LoD Tensor with level number is 1) or a nested sequence (LoD tensor with level number is 2) as its input. - Case1: If the axis is other than 0(here, axis is 1 and level is 1), - each input should have the same LoD information and the LoD + each input should have the same LoD information and the LoD information of the output keeps the same as the input. LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4) @@ -80,7 +80,7 @@ or a nested sequence (LoD tensor with level number is 2) as its input. LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4) - Case2: - If the axis is 0(here, leve is 0), the inputs are concatenated along + If the axis is 0(here, leve is 0), the inputs are concatenated along time steps, the LoD information of the output need to re-compute. The LoD information of level-1 should be same. @@ -124,8 +124,9 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(sequence_concat, ops::SequenceConcatOp, ops::SequenceConcatOpMaker, - sequence_concat_grad, ops::SequenceConcatGradOp); +REGISTER_OP_EX(sequence_concat, ops::SequenceConcatOp, + ops::SequenceConcatOpMaker, sequence_concat_grad, + ops::SequenceConcatGradOp, false); REGISTER_OP_CPU_KERNEL( sequence_concat, ops::SequenceConcatOpKernel); diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index 92dbe126bc084a4e1ead48f2e2334447480362bb..48194a547bbea5ddda7c5f3e2421431d1d81042d 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -136,14 +136,14 @@ class ShrinkRNNGradOpMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *op = new framework::OpDesc(); op->SetType("shrink_rnn_memory_grad"); op->SetInput("X", Input("X")); op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); op->SetOutput(framework::GradVarName("X"), InputGrad("X")); op->SetAttrMap(Attrs()); - return std::unique_ptr(op); + return std::unique_ptr(op); } }; diff --git a/paddle/operators/sign_op.cc b/paddle/operators/sign_op.cc index b2bfce71a6c3b7153aa10521cbb55cedbc0d7940..b2459fb2f53939b3131af1540044ce361b87d08a 100644 --- a/paddle/operators/sign_op.cc +++ b/paddle/operators/sign_op.cc @@ -50,13 +50,13 @@ class SignGradMaker : public framework::SingleGradOpDescMaker { public: using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("scale"); grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttr("scale", 0.0f); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index bca3ff1562d881be1d31e56270d22252e5e491e1..d9911a6901447d8900c3881a60c7a0852dcbf429 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -173,8 +173,8 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto* grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto* grad_op = new framework::OpDesc(); grad_op->SetType("softmax_with_cross_entropy_grad"); grad_op->SetInput("Label", Input("Label")); grad_op->SetInput("Softmax", Output("Softmax")); @@ -183,7 +183,7 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker { grad_op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss")); grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits")); grad_op->SetAttrMap(Attrs()); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc index c83b0cbad7f7e9115e7ce47a1823987830c0c086..3542d8624fec49f75314f046434cbcadf307497e 100644 --- a/paddle/operators/split_lod_tensor_op.cc +++ b/paddle/operators/split_lod_tensor_op.cc @@ -163,8 +163,8 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("merge_lod_tensor"); grad_op->SetInput("InTrue", OutputGrad("OutTrue")); grad_op->SetInput("InFalse", OutputGrad("OutFalse")); @@ -172,7 +172,7 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker { grad_op->SetInput("X", Input("X")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttrMap(Attrs()); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/split_op.cc b/paddle/operators/split_op.cc index e8c5fffcd2cdf9f1b31a6c343e4820f155d595f7..4dfae043cb1091c9491d89aec4d1415d4741e013 100644 --- a/paddle/operators/split_op.cc +++ b/paddle/operators/split_op.cc @@ -108,13 +108,13 @@ class SplitGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto op = new framework::OpDesc(); op->SetType("concat"); op->SetInput("X", OutputGrad("Out")); op->SetOutput("Out", InputGrad("X")); op->SetAttrMap(Attrs()); - return std::unique_ptr(op); + return std::unique_ptr(op); } }; diff --git a/paddle/operators/strided_memcpy_test.cc b/paddle/operators/strided_memcpy_test.cc index 68f064eaee5851333ddf9767b7138da83a28503d..230cc1ab0bbd5ac57eb7494795e3fbcdf02c3cc8 100644 --- a/paddle/operators/strided_memcpy_test.cc +++ b/paddle/operators/strided_memcpy_test.cc @@ -85,8 +85,10 @@ TEST(StridedMemcpy, GPUCrop) { platform::GPUPlace gpu0(0); platform::CPUPlace cpu; + platform::CUDADeviceContext ctx(gpu0); + int* gpu_src = reinterpret_cast(memory::Alloc(gpu0, sizeof(src))); - memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src)); + memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src), ctx.stream()); framework::DDim src_stride({5, 1}); @@ -96,7 +98,6 @@ TEST(StridedMemcpy, GPUCrop) { framework::DDim dst_dim({2, 2}); framework::DDim dst_stride({2, 1}); - platform::CUDADeviceContext ctx(gpu0); StridedMemcpy(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride, gpu_dst); @@ -122,9 +123,10 @@ TEST(StridedMemcpy, GPUConcat) { platform::GPUPlace gpu0(0); platform::CPUPlace cpu; + platform::CUDADeviceContext ctx(gpu0); int* gpu_src = reinterpret_cast(memory::Alloc(gpu0, sizeof(src))); - memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src)); + memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src), ctx.stream()); int dst[8]; int* gpu_dst = reinterpret_cast(memory::Alloc(gpu0, sizeof(dst))); @@ -132,7 +134,6 @@ TEST(StridedMemcpy, GPUConcat) { framework::DDim src_stride({2, 1}); framework::DDim dst_dim({2, 2}); framework::DDim dst_stride({4, 1}); - platform::CUDADeviceContext ctx(gpu0); StridedMemcpy(ctx, gpu_src, src_stride, dst_dim, dst_stride, gpu_dst); StridedMemcpy(ctx, gpu_src, src_stride, dst_dim, dst_stride, diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index c56fc1f10b58c1678743aff277f28064dff0f28d..891839bf9cd991e15d96b86e24ea61b09e35a7c7 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -106,8 +106,8 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Sum operator. -This operators sums the input tensors. All the inputs can carry the -LoD (Level of Details) information. However, the output only shares +This operators sums the input tensors. All the inputs can carry the +LoD (Level of Details) information. However, the output only shares the LoD information with the first input. )DOC"); } @@ -115,8 +115,8 @@ the LoD information with the first input. class SumOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDescBind& op_desc, - framework::BlockDescBind* block) const override { + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const override { auto& inputs = op_desc.Input("X"); auto var_type = framework::proto::VarDesc::SELECTED_ROWS; @@ -169,20 +169,19 @@ class SumGradMaker : public framework::GradOpDescMakerBase { public: using framework::GradOpDescMakerBase::GradOpDescMakerBase; - std::vector> operator()() - const override { - auto x_grads = InputGrad("X"); - std::vector> grad_ops; + std::vector> operator()() const override { + auto x_grads = InputGrad("X", false); + std::vector> grad_ops; grad_ops.reserve(x_grads.size()); auto og = OutputGrad("Out"); std::transform(x_grads.begin(), x_grads.end(), std::back_inserter(grad_ops), [&og](const std::string& x_grad) { - auto* grad_op = new framework::OpDescBind(); + auto* grad_op = new framework::OpDesc(); grad_op->SetType("scale"); grad_op->SetInput("X", og); grad_op->SetOutput("Out", {x_grad}); grad_op->SetAttr("scale", 1.0f); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); }); return grad_ops; } diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 337b7555c7f077bdebed46ef7b5d8b6fa0ce0363..90cbc19d1b1bab2e639e3d6d5b28cd13b30542f6 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -96,8 +96,8 @@ class WriteToArrayInferShape : public framework::InferShapeBase { class WriteToArrayInferVarType : public framework::VarTypeInference { public: - void operator()(const framework::OpDescBind &op_desc, - framework::BlockDescBind *block) const override { + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { auto x_name = op_desc.Input("X")[0]; auto out_name = op_desc.Output("Out")[0]; VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; @@ -175,14 +175,14 @@ class WriteToArrayGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("read_from_array"); grad_op->SetInput("I", Input("I")); grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttrMap(Attrs()); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; @@ -191,14 +191,14 @@ class ReadFromArrayGradMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *grad_op = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad_op = new framework::OpDesc(); grad_op->SetType("write_to_array"); grad_op->SetInput("I", Input("I")); grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttrMap(Attrs()); - return std::unique_ptr(grad_op); + return std::unique_ptr(grad_op); } }; diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 56a01e56d75a125a86e3d3a3702bf0bb64552b65..324c8b98c4811328b2a89eadc3e3420c080bd7d1 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -46,7 +46,7 @@ class WhileOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1})); framework::Executor executor(dev_ctx); - auto *block = Attr(kStepBlock); + auto *block = Attr(kStepBlock); auto *program = block->Program(); auto step_scopes = @@ -82,8 +82,8 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker { "(StepScopeVar) A vector of local scope, which size equals the " "step number of While Op. The i'th scope storages temporary " "variables generated in the i'th step."); - AddAttr(kStepBlock, - "The step block inside WhileOp"); + AddAttr(kStepBlock, + "The step block inside WhileOp"); AddComment(R"DOC( )DOC"); } @@ -99,7 +99,7 @@ class WhileGradOp : public framework::OperatorBase { void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { framework::Executor executor(dev_ctx); - auto *block = Attr(kStepBlock); + auto *block = Attr(kStepBlock); auto *program = block->Program(); auto *step_scopes = @@ -209,8 +209,8 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; protected: - std::unique_ptr Apply() const override { - auto *grad = new framework::OpDescBind(); + std::unique_ptr Apply() const override { + auto *grad = new framework::OpDesc(); grad->SetType("while_grad"); grad->SetInput(kParameters, Input(kParameters)); @@ -279,14 +279,14 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { // while operator could be renamed. grad->SetAttr("original_output_grad", extra_inputs_list); - return std::unique_ptr(grad); + return std::unique_ptr(grad); } }; class WhileGradOpVarTypeInference : public framework::VarTypeInference { public: - void operator()(const framework::OpDescBind &op_desc, - framework::BlockDescBind *block) const override { + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { auto p_names = op_desc.Input(kParameters); auto pg_names = op_desc.Output(framework::GradVarName(kParameters)); diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index 541eca5f39c2e6a4b464aec79fd8a920ab4c7732..7037551d7544d6fea54e2f4bf887309b7dc5a52e 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -97,17 +97,6 @@ void GpuMemcpyAsync(void *dst, const void *src, size_t count, "cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync"); } -void GpuMemcpySync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind) { - PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind), - "cudaMemcpy failed in paddle::platform::GpuMemcpySync"); - // note: cudaMemcpy may actually be asynchronous with respect to the caller, - // block on stream 0 to make sure the copy has completed - PADDLE_ENFORCE( - cudaStreamSynchronize(0), - "cudaStreamSynchronize failed in paddle::platform::GpuMemcpySync"); -} - void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, size_t count, cudaStream_t stream) { PADDLE_ENFORCE( diff --git a/paddle/platform/gpu_info.h b/paddle/platform/gpu_info.h index db961f3838af73855312d4cf6a80e2355306e08f..d05131fa4196057d19a8ae57bf4574c666e409cf 100644 --- a/paddle/platform/gpu_info.h +++ b/paddle/platform/gpu_info.h @@ -52,10 +52,6 @@ size_t GpuMaxChunkSize(); void GpuMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream); -//! Copy memory from address src to dst synchronously. -void GpuMemcpySync(void *dst, const void *src, size_t count, - enum cudaMemcpyKind kind); - //! Copy memory from one device to another device. void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, size_t count, cudaStream_t stream); diff --git a/paddle/platform/transform_test.cu b/paddle/platform/transform_test.cu index d36eac8379ebedb284b36012a46186cd3ac43b91..464096111e4a85b8d64d9223bfb85a1d1d42fad4 100644 --- a/paddle/platform/transform_test.cu +++ b/paddle/platform/transform_test.cu @@ -53,11 +53,11 @@ TEST(Transform, GPUUnary) { CUDADeviceContext ctx(gpu0); float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; float* gpu_buf = static_cast(Alloc(gpu0, sizeof(float) * 4)); - Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf)); + Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf), ctx.stream()); Transform trans; trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, Scale(10)); ctx.Wait(); - Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf)); + Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf), ctx.stream()); Free(gpu0, gpu_buf); for (int i = 0; i < 4; ++i) { ASSERT_NEAR(cpu_buf[i], static_cast(i + 1), 1e-5); @@ -83,11 +83,11 @@ TEST(Transform, GPUBinary) { GPUPlace gpu0(0); CUDADeviceContext ctx(gpu0); int* gpu_buf = static_cast(Alloc(gpu0, sizeof(buf))); - Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf)); + Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf), ctx.stream()); Transform trans; trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply()); ctx.Wait(); - Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf)); + Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf), ctx.stream()); Free(gpu0, gpu_buf); for (int i = 0; i < 4; ++i) { ASSERT_EQ((i + 1) * (i + 1), buf[i]); diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index de26184d0102574164a0e06da5fd58ef052d2559..88e9cdadd8650504706baf07e8d29fa0663f2905 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -108,21 +108,21 @@ static py::bytes SerializeMessage(T &self) { // Bind Methods void BindProgramDesc(py::module &m) { - py::class_(m, "ProgramDesc", "") + py::class_(m, "ProgramDesc", "") .def(py::init<>()) .def("__init__", - [](ProgramDescBind &self, const ProgramDescBind &other) { - new (&self) ProgramDescBind(other); + [](ProgramDesc &self, const ProgramDesc &other) { + new (&self) ProgramDesc(other); }) .def("__init__", - [](ProgramDescBind &self, const py::bytes &binary_str) { + [](ProgramDesc &self, const py::bytes &binary_str) { std::string str(binary_str); - new (&self) ProgramDescBind(str); + new (&self) ProgramDesc(str); }) - .def("append_block", &ProgramDescBind::AppendBlock, + .def("append_block", &ProgramDesc::AppendBlock, py::return_value_policy::reference) .def("append_backward", - [](ProgramDescBind &program_desc, const VarDescBind &target, + [](ProgramDesc &program_desc, const VarDesc &target, const std::unordered_set &no_grad_vars) { ParamGradInfoMap param_grad_map = AppendBackward(program_desc, target, no_grad_vars); @@ -138,12 +138,12 @@ void BindProgramDesc(py::module &m) { } return retv; }) - .def("block", &ProgramDescBind::MutableBlock, + .def("block", &ProgramDesc::MutableBlock, py::return_value_policy::reference) - .def("num_blocks", &ProgramDescBind::Size) - .def("serialize_to_string", SerializeMessage) + .def("num_blocks", &ProgramDesc::Size) + .def("serialize_to_string", SerializeMessage) .def("parse_from_string", - [](ProgramDescBind &program_desc, const std::string &data) { + [](ProgramDesc &program_desc, const std::string &data) { proto::ProgramDesc *desc = program_desc.Proto(); PADDLE_ENFORCE(desc->ParseFromString(data), "Fail to parse ProgramDesc from string. This could " @@ -152,35 +152,34 @@ void BindProgramDesc(py::module &m) { } void BindBlockDesc(py::module &m) { - py::class_(m, "BlockDesc", "") - .def_property_readonly("id", &BlockDescBind::ID) - .def_property_readonly("parent", &BlockDescBind::Parent) - .def("append_op", &BlockDescBind::AppendOp, + py::class_(m, "BlockDesc", "") + .def_property_readonly("id", &BlockDesc::ID) + .def_property_readonly("parent", &BlockDesc::Parent) + .def("append_op", &BlockDesc::AppendOp, py::return_value_policy::reference) - .def("prepend_op", &BlockDescBind::PrependOp, + .def("prepend_op", &BlockDesc::PrependOp, py::return_value_policy::reference) .def("var", - [](BlockDescBind &self, py::bytes byte_name) { + [](BlockDesc &self, py::bytes byte_name) { std::string name = byte_name; return self.Var(name); }, py::return_value_policy::reference) .def("has_var", - [](BlockDescBind &self, py::bytes byte_name) { + [](BlockDesc &self, py::bytes byte_name) { std::string name = byte_name; return self.HasVar(name); }) .def("find_var", - [](BlockDescBind &self, py::bytes byte_name) { + [](BlockDesc &self, py::bytes byte_name) { std::string name = byte_name; return self.FindVar(name); }, py::return_value_policy::reference) - .def("all_vars", &BlockDescBind::AllVars, - py::return_value_policy::reference) - .def("op_size", &BlockDescBind::OpSize) - .def("op", &BlockDescBind::Op, py::return_value_policy::reference) - .def("serialize_to_string", SerializeMessage); + .def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference) + .def("op_size", &BlockDesc::OpSize) + .def("op", &BlockDesc::Op, py::return_value_policy::reference) + .def("serialize_to_string", SerializeMessage); } void BindVarDsec(py::module &m) { @@ -193,25 +192,25 @@ void BindVarDsec(py::module &m) { .value("FP32", proto::DataType::FP32) .value("FP64", proto::DataType::FP64); - py::class_ var_desc(m, "VarDesc", ""); + py::class_ var_desc(m, "VarDesc", ""); var_desc .def("name", - [](const VarDescBind &self) { + [](const VarDesc &self) { py::bytes name = self.Name(); return name; }, py::return_value_policy::reference) - .def("set_shape", &VarDescBind::SetShape) - .def("set_dtype", &VarDescBind::SetDataType) - .def("shape", &VarDescBind::Shape, py::return_value_policy::reference) - .def("dtype", &VarDescBind::GetDataType) - .def("lod_level", &VarDescBind::GetLodLevel) - .def("set_lod_level", &VarDescBind::SetLoDLevel) - .def("type", &VarDescBind::GetType) - .def("set_type", &VarDescBind::SetType) - .def("serialize_to_string", SerializeMessage) - .def("persistable", &VarDescBind::Persistable) - .def("set_persistable", &VarDescBind::SetPersistable); + .def("set_shape", &VarDesc::SetShape) + .def("set_dtype", &VarDesc::SetDataType) + .def("shape", &VarDesc::Shape, py::return_value_policy::reference) + .def("dtype", &VarDesc::GetDataType) + .def("lod_level", &VarDesc::GetLodLevel) + .def("set_lod_level", &VarDesc::SetLoDLevel) + .def("type", &VarDesc::GetType) + .def("set_type", &VarDesc::SetType) + .def("serialize_to_string", SerializeMessage) + .def("persistable", &VarDesc::Persistable) + .def("set_persistable", &VarDesc::SetPersistable); py::enum_(var_desc, "VarType", "") .value("LOD_TENSOR", proto::VarDesc::LOD_TENSOR) @@ -235,26 +234,26 @@ void BindOpDesc(py::module &m) { .value("BOOLS", proto::AttrType::BOOLEANS) .value("BLOCK", proto::AttrType::BLOCK); - py::class_ op_desc(m, "OpDesc", ""); - op_desc.def("type", &OpDescBind::Type) - .def("set_type", &OpDescBind::SetType) - .def("input", &OpDescBind::Input) - .def("input_names", &OpDescBind::InputNames) - .def("set_input", &OpDescBind::SetInput) - .def("output", &OpDescBind::Output) - .def("output_names", &OpDescBind::OutputNames) - .def("set_output", &OpDescBind::SetOutput) - .def("has_attr", &OpDescBind::HasAttr) - .def("attr_type", &OpDescBind::GetAttrType) - .def("attr_names", &OpDescBind::AttrNames) - .def("set_attr", &OpDescBind::SetAttr) - .def("attr", &OpDescBind::GetAttr) - .def("set_block_attr", &OpDescBind::SetBlockAttr) - .def("block_attr", &OpDescBind::GetBlockAttr) - .def("check_attrs", &OpDescBind::CheckAttrs) - .def("infer_shape", &OpDescBind::InferShape) - .def("infer_var_type", &OpDescBind::InferVarType) - .def("serialize_to_string", SerializeMessage); + py::class_ op_desc(m, "OpDesc", ""); + op_desc.def("type", &OpDesc::Type) + .def("set_type", &OpDesc::SetType) + .def("input", &OpDesc::Input) + .def("input_names", &OpDesc::InputNames) + .def("set_input", &OpDesc::SetInput) + .def("output", &OpDesc::Output) + .def("output_names", &OpDesc::OutputNames) + .def("set_output", &OpDesc::SetOutput) + .def("has_attr", &OpDesc::HasAttr) + .def("attr_type", &OpDesc::GetAttrType) + .def("attr_names", &OpDesc::AttrNames) + .def("set_attr", &OpDesc::SetAttr) + .def("attr", &OpDesc::GetAttr) + .def("set_block_attr", &OpDesc::SetBlockAttr) + .def("block_attr", &OpDesc::GetBlockAttr) + .def("check_attrs", &OpDesc::CheckAttrs) + .def("infer_shape", &OpDesc::InferShape) + .def("infer_var_type", &OpDesc::InferVarType) + .def("serialize_to_string", SerializeMessage); } } // namespace pybind diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 31f802d4d2489dda7fbe1d773abfc14433db3d45..2d7fe251416dce629dd0a2318aaa020ec9668d9b 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -266,36 +266,36 @@ All parameter, weight, gradient are variables in Paddle. return ret_values; }); m.def("get_grad_op_descs", - [](const OpDescBind &op_desc, + [](const OpDesc &op_desc, const std::unordered_set &no_grad_set, std::unordered_map &grad_to_var, - const std::vector &grad_sub_block) { - std::vector> grad_op_descs = + const std::vector &grad_sub_block) { + std::vector> grad_op_descs = framework::OpInfoMap::Instance() .Get(op_desc.Type()) .GradOpMaker()(op_desc, no_grad_set, &grad_to_var, grad_sub_block); - std::vector grad_op_desc_ptrs(grad_op_descs.size()); + std::vector grad_op_desc_ptrs(grad_op_descs.size()); std::transform( grad_op_descs.begin(), grad_op_descs.end(), grad_op_desc_ptrs.begin(), - [](std::unique_ptr &p) { return p.release(); }); + [](std::unique_ptr &p) { return p.release(); }); return grad_op_desc_ptrs; }); - m.def("prune", [](const ProgramDescBind &origin, + m.def("prune", [](const ProgramDesc &origin, const std::vector> &targets) { - ProgramDescBind prog_with_targets(origin); + ProgramDesc prog_with_targets(origin); for (const auto &t : targets) { prog_with_targets.MutableBlock(t[0])->Op(t[1])->MarkAsTarget(); } proto::ProgramDesc pruned_desc; Prune(*prog_with_targets.Proto(), &pruned_desc); - return new ProgramDescBind(pruned_desc); + return new ProgramDesc(pruned_desc); }); - m.def("inference_optimize", [](ProgramDescBind &origin) { + m.def("inference_optimize", [](ProgramDesc &origin) { proto::ProgramDesc pruned_desc; InferenceOptimize(*(origin.Proto()), &pruned_desc); - return new ProgramDescBind(pruned_desc); + return new ProgramDesc(pruned_desc); }); m.def_submodule( "var_names", diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index 41fa658502d341fe9653a3e99b58498fcaeada47..268a0f2fa386adf99f7ea1589ff1f301f943a68b 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -14,6 +14,7 @@ #pragma once #include +#include "paddle/framework/executor.h" #include "paddle/framework/tensor.h" #include "paddle/memory/memcpy.h" #include "pybind11/numpy.h" @@ -61,11 +62,15 @@ struct CastToPyBufferImpl { auto *src_ptr = static_cast(tensor.data()); auto *dst_ptr = static_cast(dst_tensor.mutable_data( tensor.dims(), platform::CPUPlace())); - // TODO(qijun): Here we use default CUDA stream to set GPU Tensor to - // a Python numpy array. It's better to manage CDUA stream unifiedly. - paddle::platform::GpuMemcpySync(dst_ptr, src_ptr, - sizeof(CUR_TYPE) * tensor.numel(), - cudaMemcpyDeviceToHost); + + framework::DeviceContextPool &pool = + framework::DeviceContextPool::Get(); + auto dev_ctx = static_cast( + pool.Borrow(tensor.place())); + + paddle::platform::GpuMemcpyAsync( + dst_ptr, src_ptr, sizeof(CUR_TYPE) * tensor.numel(), + cudaMemcpyDeviceToHost, dev_ctx->stream()); #else PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); #endif @@ -132,10 +137,12 @@ void PyCUDATensorSetFromArray( self.Resize(framework::make_ddim(dims)); auto *dst = self.mutable_data(place); - // TODO(qijun): Here we use default CUDA stream to set a Python numpy - // array to a GPU Tensor. It's better to manage CDUA stream unifiedly. - paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(), - cudaMemcpyHostToDevice); + + framework::DeviceContextPool &pool = framework::DeviceContextPool::Get(); + auto dev_ctx = + static_cast(pool.Borrow(place)); + paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(), + cudaMemcpyHostToDevice, dev_ctx->stream()); } #endif diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index f49cabfee87242c5ae352a5445a586301c9d9b47..22a37c22c3fc777cadcdee6632bbf1fb558fef70 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -441,9 +441,25 @@ def topk(input, k): def lod_tensor_to_array(x, table): - """ - This function creates an operator to convert an LOD_Tensor to - an array. + """This function performs the operation that converts an LOD_Tensor to + an array. + + Args: + x (Variable|list): The tensor that needs to be converted to an array. + table (ParamAttr|list): The variable that stores the level of lod + which is ordered by sequence length in + descending order. + + Returns: + Variable: The variable of type array that has been converted from a + tensor. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[10]) + table = fluid.layers.lod_rank_table(x, level=0) + array = fluid.layers.lod_tensor_to_array(x, table) """ helper = LayerHelper("lod_tensor_to_array", **locals()) array = helper.create_variable( @@ -459,9 +475,26 @@ def lod_tensor_to_array(x, table): def array_to_lod_tensor(x, table): - """ - This function creates an operator to convert an array to a - LOD_Tensor. + """This function performs the operations that converts an array to + an LOD_Tensor. + + Args: + x (Variable|list): The array that needs to be converted to a tensor. + table (ParamAttr|list): The variable that stores the level of lod + which is ordered by sequence length in + descending order. + + Returns: + Variable: The variable of type tensor that has been converted + from an array. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[10]) + table = fluid.layers.lod_rank_table(x, level=0) + array = fluid.layers.lod_tensor_to_array(x, table) + lod_tensor = fluid.layers.array_to_lod_tensor(array, table) """ helper = LayerHelper("array_to_lod_tensor", **locals()) tmp = helper.create_tmp_variable(dtype=x.dtype) @@ -474,10 +507,24 @@ def array_to_lod_tensor(x, table): def increment(x, value=1.0, in_place=True): - """ - This function creates an operator to increment each value in the input - `x` by an amount: `value` as mentioned in the input parameter. This - operation is performed in-place by default. + """This function performs an operation that increments each value in the + input :math:`x` by an amount: :math:`value` as mentioned in the input + parameter. This operation is performed in-place by default. + + Args: + x (Variable|list): The tensor that has the input values. + value (float): The amount by which the values should be incremented. + in_place (bool): If the increment should be performed in-place. + + Returns: + Variable: The tensor variable storing the transformation of + element-wise increment of each value in the input. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='data', shape=[32, 32], dtype='float32') + data = fluid.layers.increment(x=data, value=3.0, in_place=True) """ helper = LayerHelper("increment", **locals()) if not in_place: @@ -493,9 +540,24 @@ def increment(x, value=1.0, in_place=True): def array_write(x, i, array=None): - """ - This function creates an operator to write the data out as a + """This function performs the operation to write the data out as an LOD_TENSOR_ARRAY. + + Args: + x (Variable|list): The input tensor from which the data will be read. + i (Variable|list): The subscript index in tensor array, that points the + place from which data will be read. + array (Variable|list): The data can be read into this variable if + this is assigned. + Returns: + Variable: The tensor type variable that has the data written to it. + + Examples: + .. code-block::python + + tmp = fluid.layers.zeros(shape=[10], dtype='int32') + i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) + arr = layers.array_write(tmp, i=i) """ helper = LayerHelper('array_write', **locals()) if array is None: @@ -512,6 +574,21 @@ def array_write(x, i, array=None): def create_array(dtype): + """This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the + LayerHelper. + + Args: + dtype (int|float): The data type of the elements in the array. + + Returns: + Variable: The tensor variable storing the elements of data type. + + Examples: + .. code-block:: python + + data = fluid.layers.create_array(dtype='float32') + + """ helper = LayerHelper("array", **locals()) return helper.create_variable( name="{0}.out".format(helper.name), @@ -550,9 +627,19 @@ def less_than(x, y, cond=None, **ignored): def array_read(array, i): - """ - This function creates an operator to read the data in as a + """This function performs the operation to read the data in as an LOD_TENSOR_ARRAY. + Args: + array (Variable|list): The input tensor that will be written to an array. + i (Variable|list): The subscript index in tensor array, that points the + place where data will be written to. + Returns: + Variable: The tensor type variable that has the data written to it. + Examples: + .. code-block::python + tmp = fluid.layers.zeros(shape=[10], dtype='int32') + i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) + arr = layers.array_read(tmp, i=i) """ helper = LayerHelper('array_read', **locals()) if not isinstance( @@ -586,9 +673,23 @@ def shrink_memory(x, i, table): def array_length(array): - """ - This function creates an operator to find the length of the + """This function performs the operation to find the length of the input LOD_TENSOR_ARRAY. + + Args: + array (LOD_TENSOR_ARRAY): The input array that will be used + to compute the length. + + Returns: + Variable: The length of the input LoDTensorArray. + + Examples: + .. code-block::python + + tmp = fluid.layers.zeros(shape=[10], dtype='int32') + i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) + arr = fluid.layers.array_write(tmp, i=i) + arr_len = fluid.layers.array_length(arr) """ helper = LayerHelper('array_length', **locals()) tmp = helper.create_tmp_variable(dtype='int64') diff --git a/python/paddle/v2/fluid/layers/io.py b/python/paddle/v2/fluid/layers/io.py index f4c5907f48b46ee5d9bcaba48370e5baf036c615..56c3f7b7b7f174338bb56bc5785423ca634650a6 100644 --- a/python/paddle/v2/fluid/layers/io.py +++ b/python/paddle/v2/fluid/layers/io.py @@ -12,20 +12,9 @@ def data(name, type=core.VarDesc.VarType.LOD_TENSOR, stop_gradient=True): """ - Data Layer. + **Data Layer** - Args: - name: The name/alias of the function - shape: Tuple declaring the shape. - append_batch_size: Whether or not to append the data as a batch. - dtype: The type of data : float32, float_16, int etc - type: The output type. By default it is LOD_TENSOR. - lod_level(int): The LoD Level. 0 means the input data is not a sequence. - main_program: Name of the main program that calls this - startup_program: Name of the startup program - stop_gradient: A boolean that mentions whether gradient should flow. - - This function takes in input and based on whether data has + This function takes in the input and based on whether data has to be returned back as a minibatch, it creates the global variable using the helper functions. The global variables can be accessed by all the following operations and layers in the graph. @@ -33,6 +22,24 @@ def data(name, All the input variables of this function are passed in as local variables to the LayerHelper constructor. + Args: + name(str): The name/alias of the function + shape(list): Tuple declaring the shape. + append_batch_size(bool): Whether or not to append the data as a batch. + dtype(int|float): The type of data : float32, float_16, int etc + type(VarType): The output type. By default it is LOD_TENSOR. + lod_level(int): The LoD Level. 0 means the input data is not a sequence. + main_program(Program): Name of the main program that calls this + startup_program(Program): Name of the startup program + stop_gradient(bool): A boolean that mentions whether gradient should flow. + + Returns: + Variable: The global variable that gives access to the data. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='x', shape=[784], dtype='float32') """ helper = LayerHelper('data', **locals()) shape = list(shape) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 8d819de603a81de6934dd4b03a6fc49f4319a2d3..a5bbf4f2bf6bcc377dd833713e1e12f6a54ed81b 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -27,48 +27,81 @@ def fc(input, """ **Fully Connected Layer** - This layer accepts multiple inputs and applies a linear transformation to each input. - If activation type is provided, the corresponding activation function is applied to the - output of the linear transformation. For each input :math:`X`, the equation is: + The fully connected layer can take multiple tensors as its inputs. It + creates a variable (one for each input tensor) called weights for each input + tensor, which represents a fully connected weight matrix from each input + unit to each output unit. The fully connected layer multiplies each input + tensor with its coresponding weight to produce an output Tensor. If + multiple input tensors are given, the results of multiple multiplications + will be sumed up. If bias_attr is not None, a biases variable will be + created and added to the output. Finally, if activation is not None, + it will be applied to the output as well. + + This process can be formulated as follows: .. math:: - Out = Act(WX + b) + Out = Act\left({\sum_{i=0}^{N-1}W_iX_i + b}\right) In the above equation: - * :math:`X`: Input value, a tensor with rank at least 2. - * :math:`W`: Weight, a 2-D tensor with shape [M, N]. - * :math:`b`: Bias, a 2-D tensor with shape [M, 1]. - * :math:`Act`: Activation function. - * :math:`Out`: Output value, same shape with :math:`X`. - - All the input variables are passed in as local variables to the LayerHelper - constructor. + * :math:`N`: Number of the input. + * :math:`X_i`: The input tensor. + * :math:`W`: The weights created by this layer. + * :math:`b`: The bias parameter created by this layer (if needed). + * :math:`Act`: The activation funtion. + * :math:`Out`: The output tensor. Args: - input(Variable|list): Input tensors. Each tensor has a rank of atleast 2 - size(int): Output size - num_flatten_dims(int): Number of columns in input - param_attr(ParamAttr|list): The parameters/weights to the FC Layer - bias_attr(ParamAttr|list): Bias parameter for the FC layer - act(str): Activation type - name(str): Name/alias of the function + input(Variable|list): The input tensor(s) to the fully connected layer. + size(int): The number of output units in the fully connected layer. + num_flatten_dims(int): The fc layer can accept an input tensor with more + than two dimensions. If this happens, the + multidimensional tensor will first be flattened + into a 2-dimensional matrix. The parameter + `num_flatten_dims` determines how the input tensor + is flattened: the first `num_flatten_dims` + dimensions will be flatten to form the first + dimension of the final matrix (height of the + matrix), and the rest `rank(X) - num_col_dims` + dimensions are flattened to form the second + dimension of the final matrix (width of the matrix). + For example, suppose `X` is a 6-dimensional tensor + with a shape [2, 3, 4, 5, 6], and + `x_num_col_dims` = 3. Then, the flattened matrix + will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. + By default, `x_num_col_dims` is set to 1. + param_attr(ParamAttr|list): The parameter attribute for learnable + parameters/weights of the fully connected + layer. + param_initializer(ParamAttr|list): The initializer used for the + weight/parameter. If set None, + XavierInitializer() will be used. + bias_attr(ParamAttr|list): The parameter attribute for the bias parameter + for this layer. If set None, no bias will be + added to the output units. + bias_initializer(ParamAttr|list): The initializer used for the bias. + If set None, then ConstantInitializer() + will be used. + act(str): Activation to be applied to the output of the fully connected + layer. + name(str): Name/alias of the fully connected layer. + Returns: - Variable: The tensor variable storing the transformation and \ - non-linearity activation result. + Variable: The output tensor variable. Raises: - ValueError: If rank of input tensor is less than 2. + ValueError: If rank of the input tensor is less than 2. Examples: .. code-block:: python - data = fluid.layers.data(name='data', shape=[32, 32], dtype='float32') + data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") fc = fluid.layers.fc(input=data, size=1000, act="tanh") """ - helper = LayerHelper('fc', **locals()) + + helper = LayerHelper("fc", **locals()) dtype = helper.input_dtype() @@ -88,8 +121,8 @@ def fc(input, "Y": w, }, outputs={"Out": tmp}, - attrs={'x_num_col_dims': num_flatten_dims, - 'y_num_col_dims': 1}) + attrs={"x_num_col_dims": num_flatten_dims, + "y_num_col_dims": 1}) mul_results.append(tmp) # sum @@ -117,7 +150,7 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'): Args: input(Variable): Input to the function - size(int): Output size + size(tuple|list|None): Shape of the look up table parameter is_sparse(bool): Boolean flag that specifying whether the input is sparse param_attr(ParamAttr): Parameters for this layer dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc @@ -704,6 +737,7 @@ def conv2d_transpose(input, filter_size=None, padding=None, stride=None, + dilation=None, param_attr=None): """ The transpose of conv2d layer. @@ -727,6 +761,9 @@ def conv2d_transpose(input, stride(int|tuple): The stride size. If stride is a tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. + dilation(int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. param_attr: Parameter Attribute. main_program(Program): the main program startup_program(Program): the startup program @@ -747,10 +784,15 @@ def conv2d_transpose(input, op_attr['paddings'] = padding if isinstance(stride, int): - op_attr['strides'] = stride + op_attr['strides'] = [stride, stride] elif stride is not None: op_attr['strides'] = stride + if isinstance(dilation, int): + op_attr['dilations'] = [dilation, dilation] + elif dilation is not None: + op_attr['dilations'] = dilation + if filter_size is None: if output_size is None: raise ValueError("output_size must be set when filter_size is None") @@ -759,14 +801,17 @@ def conv2d_transpose(input, padding = op_attr.get('paddings', [0, 0]) stride = op_attr.get('strides', [1, 1]) + dilation = op_attr.get('dilations', [1, 1]) h_in = input.shape[2] w_in = input.shape[3] - filter_size_h = output_size[0] - \ - (h_in - 1) * stride[0] + 2 * padding[0] - filter_size_w = output_size[1] - \ - (w_in - 1) * stride[1] + 2 * padding[1] + + filter_size_h = (output_size[0] - (h_in - 1) * stride[0] + 2 * + padding[0] - 1) / dilation[0] + 1 + filter_size_w = (output_size[1] - (w_in - 1) * stride[1] + 2 * + padding[1] - 1) / dilation[1] + 1 filter_size = [filter_size_h, filter_size_w] + elif isinstance(filter_size, int): filter_size = [filter_size, filter_size] diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py index e984a6be19f43afca41482f3cf4c52df4409ab92..e5820d24cd2b34ef53cbb91e2be66efc1b74d315 100644 --- a/python/paddle/v2/fluid/layers/tensor.py +++ b/python/paddle/v2/fluid/layers/tensor.py @@ -27,10 +27,23 @@ def cast(x, dtype): return out -def concat(input, axis): +def concat(input, axis=0): """ - This function concats the input along the axis mentioned + **Concat** + + This function concatenates the input along the axis mentioned and returns that as the output. + + Args: + input(list): List of tensors to be concatenated + axis(int): Integer axis along which the tensors will be concatenated + + Returns: + Variable: Output variable of the concatenation + + Examples: + .. code-block:: python + out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth]) """ helper = LayerHelper('concat', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -43,9 +56,28 @@ def concat(input, axis): def sums(input, out=None): - """ - This function takes in the input and performs the sum operation on it - and returns that as the output. + """This function performs the sum operation on the input and returns the + result as the output. + + Args: + input (Variable|list): The input tensor that has the elements + that need to be summed up. + + Returns: + Variable: The tensor type variable that has the sum of input + written to it. + + Examples: + .. code-block::python + + tmp = fluid.layers.zeros(shape=[10], dtype='int32') + i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) + a0 = layers.array_read(array=tmp, i=i) + i = layers.increment(x=i) + a1 = layers.array_read(array=tmp, i=i) + mean_a0 = layers.mean(x=a0) + mean_a1 = layers.mean(x=a1) + a_sum = layers.sums(input=[mean_a0, mean_a1]) """ helper = LayerHelper('sum', **locals()) if out is None: @@ -55,6 +87,24 @@ def sums(input, out=None): def assign(input, output): + """ + **Assign** + + This function copies the *input* Variable to the *output* Variable. + + Args: + input(Variable): The source variable + output(Variable): The destination variable + + Returns: + Variable: The destination variable that was supplied as the *output*. + + Examples: + .. code-block:: python + out = fluid.layers.create_tensor(dtype='float32') + hidden = fluid.layers.fc(input=data, size=10) + fluid.layers.assign(hidden, out) + """ helper = LayerHelper('assign', **locals()) helper.append_op( type='scale', @@ -68,9 +118,9 @@ def fill_constant(shape, dtype, value, out=None): """ **fill_constant** - This function creates a tensor of specified *shape* and + This function creates a tensor of specified *shape* and *dtype*, and initializes this with a constant supplied in *value*. - + It also sets *stop_gradient* to True. Args: @@ -110,9 +160,9 @@ def fill_constant_batch_size_like(input, """ **fill_constant_batch_size_like** - This function creates a tensor of specified *shape*, *dtype* and batch size, - and initializes this with a constant supplied in *value*. The batch size is - obtained from the `input` tensor. + This function creates a tensor of specified *shape*, *dtype* and batch size, + and initializes this with a constant supplied in *value*. The batch size is + obtained from the `input` tensor. It also sets *stop_gradient* to True. diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py index f6f320c788e7e08d44df8aff5ad3792b237e103a..ab4561b0423dd73c8c0d529cbf34b52876b1b77c 100644 --- a/python/paddle/v2/fluid/param_attr.py +++ b/python/paddle/v2/fluid/param_attr.py @@ -58,7 +58,9 @@ class ParamAttr(object): def to_kwargs(self, with_initializer=False): kwargs = { 'name': self.name, - 'learning_rate': self.learning_rate, + 'optimize_attr': { + 'learning_rate': self.learning_rate + }, 'regularizer': self.regularizer, 'trainable': self.trainable, 'clip_attr': self.clip diff --git a/python/paddle/v2/fluid/tests/test_batch_norm_op.py b/python/paddle/v2/fluid/tests/test_batch_norm_op.py index dee2febb83d171ed4a13921e3b7d37322ead2786..ec71d391e61a45d2be52b57aa6a8f733ca0fc4c3 100644 --- a/python/paddle/v2/fluid/tests/test_batch_norm_op.py +++ b/python/paddle/v2/fluid/tests/test_batch_norm_op.py @@ -341,6 +341,10 @@ class TestBatchNormOp(OpTest): places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu("batch_norm"): places.append(core.GPUPlace(0)) + + core.init_devices(["CPU", "GPU:0"]) + else: + core.init_devices(["CPU"]) for place in places: for data_format in ["NCHW", "NHWC"]: test_with_place(place, data_format, [2, 3, 4, 5]) diff --git a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py index d7b1f2f2a3abf6335998742dbbef8e17794170fa..d59537b924d57d40f7d740d99eb814c95f528e5f 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py @@ -3,14 +3,17 @@ import numpy as np from op_test import OpTest -def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): +def conv2dtranspose_forward_naive(input_, filter_, attrs): in_n, in_c, in_h, in_w = input_.shape f_c, out_c, f_h, f_w = filter_.shape assert in_c == f_c - stride, pad = conv2dtranspose_param['stride'], conv2dtranspose_param['pad'] - out_h = (in_h - 1) * stride[0] + f_h - out_w = (in_w - 1) * stride[1] + f_w + stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[ + 'dilations'] + d_bolck_h = dilations[0] * (f_h - 1) + 1 + d_bolck_w = dilations[1] * (f_w - 1) + 1 + out_h = (in_h - 1) * stride[0] + d_bolck_h + out_w = (in_w - 1) * stride[1] + d_bolck_w out = np.zeros((in_n, out_c, out_h, out_w)) @@ -23,9 +26,9 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): for k in range(out_c): tmp_out = np.sum(input_masked * filter_[:, k, :, :], axis=0) - i1, i2 = i * stride[0], i * stride[0] + f_h - j1, j2 = j * stride[0], j * stride[0] + f_w - out[n, k, i1:i2, j1:j2] += tmp_out + i1, i2 = i * stride[0], i * stride[0] + d_bolck_h + j1, j2 = j * stride[0], j * stride[0] + d_bolck_h + out[n, k, i1:i2:dilations[0], j1:j2:dilations[1]] += tmp_out out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]] return out @@ -37,11 +40,8 @@ class TestConv2dTransposeOp(OpTest): self.init_op_type() self.init_test_case() - conv2dtranspose_param = {'stride': self.stride, 'pad': self.pad} input_ = np.random.random(self.input_size).astype("float32") filter_ = np.random.random(self.filter_size).astype("float32") - output = conv2dtranspose_forward_naive( - input_, filter_, conv2dtranspose_param).astype('float32') self.inputs = {'Input': input_, 'Filter': filter_} self.attrs = { @@ -49,6 +49,10 @@ class TestConv2dTransposeOp(OpTest): 'paddings': self.pad, 'dilations': self.dilations } + + output = conv2dtranspose_forward_naive(input_, filter_, + self.attrs).astype('float32') + self.outputs = {'Output': output} def test_check_output(self): @@ -104,11 +108,60 @@ class TestWithStride(TestConv2dTransposeOp): self.filter_size = [f_c, 6, 3, 3] +class TestWithDilation(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [2, 2] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + # ------------ test_cudnn ------------ class TestCudnn(TestConv2dTransposeOp): def init_op_type(self): self.op_type = "conv2d_transpose_cudnn" +class TestCudnnWithPad(TestWithPad): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + def init_op_type(self): + self.op_type = "conv2d_transpose_cudnn" + + +class TestCudnnWithStride(TestWithStride): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [2, 2] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + def init_op_type(self): + self.op_type = "conv2d_transpose_cudnn" + + +# #cudnn v5 does not support dilation conv. +# class TestCudnnWithDilation(TestWithDilation): +# def init_test_case(self): +# self.pad = [1, 1] +# self.stride = [2, 2] +# self.dilations = [2, 2] +# self.input_size = [2, 3, 5, 5] # NCHW +# f_c = self.input_size[1] +# self.filter_size = [f_c, 6, 3, 3] +# +# def init_op_type(self): +# self.op_type = "conv2d_transpose_cudnn" + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py index 8fd34b87bfea91307f52fdcbb9f71f2e1a9c6c56..a353f9b4d40233de46237005138f21430f4d865a 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py @@ -3,15 +3,20 @@ import numpy as np from op_test import OpTest -def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): +def conv3dtranspose_forward_naive(input_, filter_, attrs): in_n, in_c, in_d, in_h, in_w = input_.shape f_c, out_c, f_d, f_h, f_w = filter_.shape assert in_c == f_c - stride, pad = conv3dtranspose_param['stride'], conv3dtranspose_param['pad'] - out_d = (in_d - 1) * stride[0] + f_d - out_h = (in_h - 1) * stride[1] + f_h - out_w = (in_w - 1) * stride[2] + f_w + stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[ + 'dilations'] + + d_bolck_d = dilations[0] * (f_d - 1) + 1 + d_bolck_h = dilations[1] * (f_h - 1) + 1 + d_bolck_w = dilations[2] * (f_w - 1) + 1 + out_d = (in_d - 1) * stride[0] + d_bolck_d + out_h = (in_h - 1) * stride[1] + d_bolck_h + out_w = (in_w - 1) * stride[2] + d_bolck_w out = np.zeros((in_n, out_c, out_d, out_h, out_w)) for n in range(in_n): @@ -25,10 +30,11 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): for k in range(out_c): tmp_out = np.sum(input_masked * filter_[:, k, :, :, :], axis=0) - d1, d2 = d * stride[0], d * stride[0] + f_d - i1, i2 = i * stride[1], i * stride[1] + f_h - j1, j2 = j * stride[2], j * stride[2] + f_w - out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out + d1, d2 = d * stride[0], d * stride[0] + d_bolck_d + i1, i2 = i * stride[1], i * stride[1] + d_bolck_h + j1, j2 = j * stride[2], j * stride[2] + d_bolck_w + out[n, k, d1:d2:dilations[0], i1:i2:dilations[1], j1:j2: + dilations[2]] += tmp_out out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w - pad[2]] @@ -41,18 +47,19 @@ class TestConv3dTransposeOp(OpTest): self.init_op_type() self.init_test_case() - conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad} input_ = np.random.random(self.input_size).astype("float32") filter_ = np.random.random(self.filter_size).astype("float32") - output = conv3dtranspose_forward_naive( - input_, filter_, conv3dtranspose_param).astype("float32") self.inputs = {'Input': input_, 'Filter': filter_} self.attrs = { 'strides': self.stride, 'paddings': self.pad, - # 'dilations': self.dilations + 'dilations': self.dilations } + + output = conv3dtranspose_forward_naive(input_, filter_, + self.attrs).astype("float32") + self.outputs = {'Output': output} def test_check_output(self): @@ -108,11 +115,60 @@ class TestWithStride(TestConv3dTransposeOp): self.filter_size = [f_c, 6, 3, 3, 3] +class TestWithDilation(TestConv3dTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [2, 2, 2] + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + # ------------ test_cudnn ------------ class TestCudnn(TestConv3dTransposeOp): def init_op_type(self): self.op_type = "conv3d_transpose_cudnn" +class TestCudnnWithPad(TestWithPad): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + def init_op_type(self): + self.op_type = "conv3d_transpose_cudnn" + + +class TestCudnnWithStride(TestWithStride): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + def init_op_type(self): + self.op_type = "conv3d_transpose_cudnn" + + +# #cudnn v5 does not support dilation conv. +# class TestCudnnWithDilation(TestWithDilation): +# def init_test_case(self): +# self.pad = [1, 1, 1] +# self.stride = [2, 2, 2] +# self.dilations = [2, 2, 2] +# self.input_size = [2, 3, 5, 5, 5] # NCDHW +# f_c = self.input_size[1] +# self.filter_size = [f_c, 6, 3, 3, 3] +# +# def init_op_type(self): +# self.op_type = "conv3d_transpose_cudnn" + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py b/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py index eff8fa87d9c0dafc6935604101e94ee6c8b081ce..cd91769a22f8d6af193efabd8d997913676fbba6 100644 --- a/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py +++ b/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py @@ -7,7 +7,7 @@ class TestFillZerosLikeOp(OpTest): def setUp(self): self.op_type = "fill_zeros_like" self.inputs = {'X': np.random.random((219, 232)).astype("float32")} - self.outputs = {'Y': np.zeros_like(self.inputs["X"])} + self.outputs = {'Out': np.zeros_like(self.inputs["X"])} def test_check_output(self): self.check_output() diff --git a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py index 627ab4e23562f14538d85f2e21edeb7d72d940bb..a9d943b8b7f7d9bc0dec89c5360769e0328527ba 100644 --- a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py @@ -1,32 +1,47 @@ import unittest +import numpy + +import paddle.v2.fluid as fluid import paddle.v2.fluid.core as core from paddle.v2.fluid.op import Operator -import numpy +from paddle.v2.fluid.executor import Executor class TestGaussianRandomOp(unittest.TestCase): + def setUp(self): + self.op_type = "gaussian_random" + self.inputs = {} + self.attrs = {"shape": [1000, 784], "mean": .0, "std": 1., "seed": 10} + + self.outputs = ["Out"] + def test_cpu(self): - self.gaussian_random_test(place=core.CPUPlace()) + self.gaussian_random_test(place=fluid.CPUPlace()) def test_gpu(self): if core.is_compile_gpu(): - self.gaussian_random_test(place=core.GPUPlace(0)) + self.gaussian_random_test(place=fluid.GPUPlace(0)) def gaussian_random_test(self, place): - scope = core.Scope() - scope.var('Out').get_tensor() - - op = Operator( - "gaussian_random", - Out='Out', - shape=[1000, 784], - mean=.0, - std=1., - seed=10) context = core.DeviceContext.create(place) - op.run(scope, context) - tensor = numpy.array(scope.find_var('Out').get_tensor()) + program = fluid.Program() + block = program.global_block() + vout = block.create_var(name="Out") + op = block.append_op( + type=self.op_type, outputs={"Out": vout}, attrs=self.attrs) + + op.desc.infer_var_type(block.desc) + op.desc.infer_shape(block.desc) + + fetch_list = [] + for var_name in self.outputs: + fetch_list.append(block.var(var_name)) + + exe = Executor(place) + outs = exe.run(program, fetch_list=fetch_list) + tensor = outs[0] + self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) diff --git a/python/paddle/v2/fluid/tests/test_uniform_random_op.py b/python/paddle/v2/fluid/tests/test_uniform_random_op.py index f736dfb2e85552b321403c961da517f3b3efb100..00b4f196209a6414f1063a33c0e31093e33ca39d 100644 --- a/python/paddle/v2/fluid/tests/test_uniform_random_op.py +++ b/python/paddle/v2/fluid/tests/test_uniform_random_op.py @@ -1,32 +1,50 @@ import unittest +import numpy + from paddle.v2.fluid.op import Operator import paddle.v2.fluid.core as core -import numpy +import paddle.v2.fluid as fluid class TestUniformRandomOp(unittest.TestCase): - def test_uniform_random_cpu(self): + def setUp(self): + self.op_type = "uniform_random" + self.inputs = {} + self.attrs = { + "shape": [1000, 784], + "min": -5.0, + "max": 10.0, + "seed": 10 + } + self.outputs = ["Out"] + + def test_cpu(self): self.uniform_random_test(place=core.CPUPlace()) - def test_uniform_random_gpu(self): + def test_gpu(self): if core.is_compile_gpu(): self.uniform_random_test(place=core.GPUPlace(0)) def uniform_random_test(self, place): - scope = core.Scope() - scope.var('X').get_tensor() - - op = Operator( - "uniform_random", - Out='X', - shape=[1000, 784], - min=-5.0, - max=10.0, - seed=10) - - ctx = core.DeviceContext.create(place) - op.run(scope, ctx) - tensor = numpy.array(scope.find_var('X').get_tensor()) + context = core.DeviceContext.create(place) + program = fluid.Program() + block = program.global_block() + vout = block.create_var(name="Out") + op = block.append_op( + type=self.op_type, outputs={"Out": vout}, attrs=self.attrs) + + op.desc.infer_var_type(block.desc) + op.desc.infer_shape(block.desc) + + fetch_list = [] + for var_name in self.outputs: + fetch_list.append(block.var(var_name)) + + exe = fluid.Executor(place) + outs = exe.run(program, fetch_list=fetch_list) + + tensor = outs[0] + self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1)