diff --git a/CMakeLists.txt b/CMakeLists.txt index b309ff37e52b4fd28b14925bdd7e3740e1e2fa47..00996cb7ed5cc573c42b69be6db369c3654d6d1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,14 +16,14 @@ cmake_minimum_required(VERSION 3.0) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) -SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") -SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") include(system) project(paddle CXX C Go) -message(STATUS "CXX compiler: " ${CMAKE_CXX_COMPILER} ", version: " ${CMAKE_CXX_COMPILER_VERSION}) -message(STATUS "C compiler: " ${CMAKE_C_COMPILER} ", version: " ${CMAKE_C_COMPILER_VERSION}) +message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " + "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") +message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " + "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") find_package(Sphinx) if(NOT CMAKE_CROSSCOMPILING) @@ -201,6 +201,10 @@ if(WITH_GOLANG) endif(WITH_GOLANG) set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") + +SET(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") +SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") + add_subdirectory(paddle) if(WITH_PYTHON) add_subdirectory(python) diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md index 8ee7fd28c58f2a2bcb82040eb824a37062bd4e9c..084d3237d9cfe9ca4837f77cf5f70a2449cfcc03 100644 --- a/benchmark/IntelOptimizedPaddle.md +++ b/benchmark/IntelOptimizedPaddle.md @@ -22,6 +22,7 @@ On each machine, we will test and compare the performance of training on single #### Training Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz +Pay attetion that the speed below includes forward, backward and parameter update time. So we can not directly compare the data with the benchmark of caffe `time` [command](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/caffe/image/run.sh#L9), which only contain forward and backward. The updating time of parameter would become very heavy when the weight size are large, especially on alexnet. Input image size - 3 * 224 * 224, Time: images/second @@ -55,6 +56,16 @@ Input image size - 3 * 224 * 224, Time: images/second +- Alexnet + +| BatchSize | 64 | 128 | 256 | +|--------------|--------| ------ | -------| +| OpenBLAS | 2.13 | 2.45 | 2.68 | +| MKLML | 66.37 | 105.60 | 144.04 | +| MKL-DNN | 399.00 | 498.94 | 626.53 | + +chart TBD + #### Inference Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz - VGG-19 @@ -82,6 +93,15 @@ Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz | MKLML | 22.74 | 41.56 | 81.22 | 133.47 | 210.53 | | MKL-DNN | 175.10 | 272.92 | 450.70 | 512.00 | 600.94 | +- Alexnet + +| BatchSize | 1 | 2 | 4 | 8 | 16 | +|-----------|--------|--------|--------|--------|--------| +| OpenBLAS | | | | | | +| MKLML | 21.32 | 36.55 | 73.06 | 131.15 | 192.77 | +| MKL-DNN | 442.91 | 656.41 | 719.10 | 847.68 | 850.51 | + +chart TBD ### Laptop TBD diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py index 77d130ae34059d1e87040d00346ac1dadd86b0d8..cad6051f1413a5bb95f87a940f3aa81e49e5d282 100644 --- a/benchmark/paddle/image/alexnet.py +++ b/benchmark/paddle/image/alexnet.py @@ -19,7 +19,11 @@ args = { 'num_samples': num_samples } define_py_data_sources2( - "train.list", None, module="provider", obj="process", args=args) + "train.list" if not is_infer else None, + "test.list" if is_infer else None, + module="provider", + obj="process", + args=args) settings( batch_size=batch_size, diff --git a/benchmark/paddle/image/run_openblas_infer.sh b/benchmark/paddle/image/run_openblas_infer.sh index da034f3b9dff794e22086a5295ad2b0c2361c356..71a49231a5527ebee9f45d5f4650ce2a4f6a1c31 100755 --- a/benchmark/paddle/image/run_openblas_infer.sh +++ b/benchmark/paddle/image/run_openblas_infer.sh @@ -8,15 +8,19 @@ function clock_to_seconds() { } function infer() { - unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY topology=$1 layer_num=$2 bs=$3 - thread=`nproc` - if [ $thread -gt $bs ]; then - thread=$bs + trainers=`nproc` + if [ $trainers -gt $bs ]; then + trainers=$bs fi - log="logs/infer-${topology}-${layer_num}-${thread}openblas-${bs}.log" + log="logs/infer-${topology}-${layer_num}-${trainers}openblas-${bs}.log" + threads=$((`nproc` / trainers)) + if [ $threads -eq 0 ]; then + threads=1 + fi + export OPENBLAS_NUM_THREADS=$threads models_in="models/${topology}-${layer_num}/pass-00000/" if [ ! -d $models_in ]; then @@ -28,7 +32,7 @@ function infer() { --config="${topology}.py" \ --use_mkldnn=False \ --use_gpu=False \ - --trainer_count=$thread \ + --trainer_count=$trainers \ --log_period=$log_period \ --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True,num_samples=256" \ --init_model_path=$models_in \ diff --git a/benchmark/paddle/image/run_openblas_train.sh b/benchmark/paddle/image/run_openblas_train.sh index e9df83fee2a3f796b7234b39619364f6ee4d5dc9..935cff6f2c97d25d6de556cfee25e27dbe49b5b6 100755 --- a/benchmark/paddle/image/run_openblas_train.sh +++ b/benchmark/paddle/image/run_openblas_train.sh @@ -1,7 +1,7 @@ set -e function train() { - unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY + export OPENBLAS_NUM_THREADS=1 topology=$1 layer_num=$2 bs=$3 diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 96fc886a342cae38d5b804266d3af7bc909a4da2..c4712f19eb80b34ffbf713d2b13fc0c775312af1 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -19,7 +19,7 @@ ExternalProject_Add( if (${CMAKE_VERSION} VERSION_LESS "3.3.0") set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) - file(WRITE ${dummyfile} "const char * dummy_eigen3 = \"${dummyfile}\";") + file(WRITE ${dummyfile} "const char *dummy_eigen3 = \"${dummyfile}\";") add_library(eigen3 STATIC ${dummyfile}) else() add_library(eigen3 INTERFACE) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 5d24caebdcc5a28823164d718fb1628be5c4179d..b67c559fdf7a2b1695589a041cfa8fb8a9580516 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -63,9 +63,17 @@ ExternalProject_Add( -DMKLROOT:PATH=${MKLML_ROOT} ) -ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) -ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) +ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) +ADD_DEPENDENCIES(shared_mkldnn ${MKLDNN_PROJECT}) MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") add_definitions(-DPADDLE_WITH_MKLDNN) -LIST(APPEND external_project_dependencies mkldnn) +LIST(APPEND external_project_dependencies shared_mkldnn) + +# generate a static dummy target to track mkldnn dependencies +# for cc_library(xxx SRCS xxx.c DEPS mkldnn) +SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/mkldnn_dummy.c) +FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") +ADD_LIBRARY(mkldnn STATIC ${dummyfile}) +TARGET_LINK_LIBRARIES(mkldnn ${MKLDNN_LIB} ${MKLML_LIB} ${MKLML_IOMP_LIB}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 97857a686b38d935b19f510ecdcb66bcca91fe03..0e79c0cc7992060cbe3b668ec927936183389eb6 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -30,23 +30,21 @@ IF(NOT ${CBLAS_FOUND}) CACHE FILEPATH "openblas library." FORCE) SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") + SET(OPENBLAS_COMMIT "v0.2.20") IF(CMAKE_CROSSCOMPILING) SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) GET_FILENAME_COMPONENT(CROSS_SUFFIX ${CMAKE_C_COMPILER} DIRECTORY) SET(CROSS_SUFFIX ${CROSS_SUFFIX}/) IF(ANDROID) - # arm_soft_fp_abi branch of OpenBLAS to support softfp - # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi - SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + # use softfp SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) ENDIF() ELSEIF(IOS) IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") - SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) @@ -56,14 +54,12 @@ IF(NOT ${CBLAS_FOUND}) ENDIF() ELSEIF(RPI) # use hardfp - SET(OPENBLAS_COMMIT "v0.2.20") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 USE_THREAD=0) ENDIF() ELSE() IF(APPLE) SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") ENDIF() - SET(OPENBLAS_COMMIT "v0.2.20") SET(OPTIONAL_ARGS "") IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) @@ -113,7 +109,7 @@ INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) # FIXME(gangliao): generate cblas target to track all high performance # linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas) SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/cblas_dummy.c) -FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") +FILE(WRITE ${dummyfile} "const char *dummy_cblas = \"${dummyfile}\";") ADD_LIBRARY(cblas STATIC ${dummyfile}) TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES}) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 66c8e3ad7ef7c80c1f388c25983425a0db5c0220..585db019d521b1699baadfae31ef95b5059c71b4 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -120,7 +120,7 @@ function(merge_static_libs TARGET_NAME) DEPENDS ${libs}) # Generate dummy staic lib - file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";") + file(WRITE ${target_SRCS} "const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";") add_library(${TARGET_NAME} STATIC ${target_SRCS}) target_link_libraries(${TARGET_NAME} ${libs_deps}) @@ -160,7 +160,7 @@ function(merge_static_libs TARGET_NAME) DEPENDS ${libs} ${target_OBJS}) # Generate dummy staic lib - file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";") + file(WRITE ${target_SRCS} "const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";") add_library(${TARGET_NAME} STATIC ${target_SRCS}) target_link_libraries(${TARGET_NAME} ${libs_deps}) @@ -324,7 +324,7 @@ function(go_library TARGET_NAME) ) # Add dummy code to support `make target_name` under Terminal Command - file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + file(WRITE ${dummyfile} "const char *dummy_${TARGET_NAME} = \"${dummyfile}\";") if (go_library_SHARED OR go_library_shared) add_library(${TARGET_NAME} SHARED ${dummyfile}) else() diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index d81481ca819c13ee0e299c204f998f3915c34bd4..ddf0b055a92d80295b24255a5462d477e0d9c796 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -252,6 +252,11 @@ first_seq .. autoclass:: paddle.v2.layer.first_seq :noindex: +sub_seq +--------- +.. autoclass:: paddle.v2.layer.sub_seq + :noindex: + concat ------ .. autoclass:: paddle.v2.layer.concat diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index ef9febe0aa9d1f65e6608495f6ad7d4f502acf59..a7c8670f66cc7f319e41155211ead2d89126117f 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -68,12 +68,6 @@ scale :noindex: -reshape ---------- -.. autofunction:: paddle.v2.fluid.layers.reshape - :noindex: - - transpose --------- .. autofunction:: paddle.v2.fluid.layers.transpose @@ -313,6 +307,12 @@ sequence_expand :noindex: +gru_unit +-------- +.. autofunction:: paddle.v2.fluid.layers.gru_unit + :noindex: + + lstm_unit --------- .. autofunction:: paddle.v2.fluid.layers.lstm_unit @@ -332,7 +332,19 @@ reduce_sum reduce_mean ---------- +----------- .. autofunction:: paddle.v2.fluid.layers.reduce_mean :noindex: + +reduce_max +---------- +.. autofunction:: paddle.v2.fluid.layers.reduce_max + :noindex: + + +reduce_min +---------- +.. autofunction:: paddle.v2.fluid.layers.reduce_min + :noindex: + diff --git a/doc/design/backward.md b/doc/design/backward.md new file mode 100644 index 0000000000000000000000000000000000000000..20fda7a98f514a3f1c1c2d0ba7447ec954b21d5a --- /dev/null +++ b/doc/design/backward.md @@ -0,0 +1,158 @@ +# Backward Building + +## Motivation + +In Neural Network, most models are solved by the backpropagation algorithm(known as **BP**) at present. Technically, BP calculates the gradient of the loss function, then propagates it back through the networks following the chain rule. However, when configuring the model structure, users do not need to define the backward part. So a mechanism is required by the framework which can complete the model's backward part automatically according to the given forward part. + +When implementing a specific `op`, the developer is also asked to implement its backward version, called `grad_op`. A `grad_op` takes gradients of its corresponding `op`'s outputs, and calculate gradients of the `op`'s inputs. During the building of a model's backward part, the framework creates each forward `op`'s `grad_op`, and then string them together in reverse order of forwarding part. In this way, gradients spread from the end to the beginning of the model, in another word, from the loss to parameters. + +## Challenges + +The motivation of backward building is apparent. However, implementation it correctly is not so easy. In the **Fluid** design, a deep learning model is described by `Program`, `Block`, `Op` and `Variable`. The `Block` itself can be nested. It means that the `op`s and `variable`s are scattered across different blocks rather than all be gathered in a single graph. Our backward building algorithm shall visit blocks in recursive order and be able to insert `grad_op`s and new created `variable`s into the right place. + +## Usage + +Although the whole algorithm is comprised of many functions, only one is exposed as API: + +```python +def append_backward(loss, parameter_list=None, no_grad_set=None): + """ + Append backward part to main_program + + Args: + loss(Variable): The variable generated by the cost function. + parameter_list(list): Parameters that need to be updated by optimizers. + If None, it means all parameters need to be updated. + + no_grad_set(set): Variables that have no gradients in Block 0. + If None, the set will be generated inside the function and + contains all variables with `step_gradient=True` from all blocks. + + Return: + (list[Variable]): list of (parameters, gradients) pair. + """ +``` + +By invoking this API, the framework appends backward part of the program where the `loss` is. It takes three arguments. `loss` means the final loss value. It must be a scalar and is usually the output of the loss layer. It is also where the gradient generated and backpropagation starts. `parameter_list` marks all parameters needs updating. If it's `None`, all parameter will be updated by optimizers. `no_grad_set` marks variables without gradient. if all outputs of some `grad_op` are in `no_grad_set`, the `grad_op` will not be run. + +This API will be invoked automatically before optimizer building. +As a result, in most cases, users do not need to invoke the API by themselves to append backward part. + +## Implementation + +The implementation of backward building algorithm is in `backward.py` file. The whole algorithm can be divided into two independent parts: creating `grad_op`s and creating new variables. + +### Creating `grad_op`s + +The creating of `grad_op`s is implemented by: + +```python +def _append_backward_ops_(target, + block, + target_block, + no_grad_dict, + grad_to_var): + """ + Create all grad ops, and insert them into given block + + Args: + target(Variable): the target variable of forward pass + block(Block): the block where forward ops are + target_block(Block): the block which is going to hold new generated grad ops + no_grad_dict(dict): + key(int) block index + val(set) a set of varibale names. These varibales have no gradient + grad_to_var(dict)(output argument): + key(str): grad variable name + val(str): corresponding forward variable name + """ +``` + +Given a `block`, the function will traverses all `op`s in this block in reverse order, gets corresponding `grad_op` from the C++ core via `core.get_grad_op_desc()`, then append it to `target_block`. + +However, some specific `op`(e.g. `while_op`, `if_else_op`) can hold its own sub-block. For these sub-blocks contains `op`s as well, the `grad_op` creating should be recursive. + +During the reverse traversal, we check each `op` whether it has an attribute named `sub_block`. If so, it means there is a sub-block and we need to deal with it first. After creating a new block whose father is the one in `op`'s attribute, we invoke `_append_backward_ops_()` recursively, assigning the new block to parameter `target_block` and the one in `op`'s attribute to `block`. The *pseudo-code* shows this process: + +``` +******* pseudo-code ******** +for op in reversed(block.ops): + if op has an attribute named 'sub_block': + Get the sub-block(`s_block`) from op's attribute. + Create a new block(`grad_s_block`), whose father is `s_block`. + Invoke _append_backward_ops_(), with `block=s_block` and `target_block=grad_s_block` + + Invoke `core.get_grad_op_desc()` to get op's grad_op. + Insert name correspondings between variables and their gradients of the grad_op to grad_to_var + Assign grad_s_block to grad_op as it's 'sub_block' attribute. + Append grad_op to current target_block. +``` + +The first invoking of `_append_backward_ops_()` is initiated by `append_backward()`, in which parameters `block` and `target_block` are all assigned with root block(the block with index 0). + +### Corner Cases of `grad_op` Creating + +In the previous section, we show the regular process of `grad_op` creating. However, in some corner cases, the conventional algorithm is not enough to get the correct result and appending handling is required. These additional processes run after the algorithm mentioned above and do some special adjusts on its output `grad_op`s. + +#### Shared Variables + +If a variable is read by more than one `op` in the forward pass, its gradient is likely to be written by more than one `grad_op`s in the next backward pass. To make the gradient result being the sum of all `grad_op`s' outputs instead of the last running one, we assign each output with a temporary variable and then add a `sum_op` to add them up. + +For the debug convenience, if the final gradient name is `w@GRAD`, it's corresponding temporary variables will be named as `w@GRAD@RENAME@0`, `w@GRAD@RENAME@1`... + +See function `_addup_repetitive_outputs_` in `backward.py` for implementation details. + +#### No Gradient Variables + +In our framework, variables can be marked as *no_gradient*, it means that the gradient of this variable is unnecessary and can be considered as zero in model training. Apparently, when all the outputs of some `grad_op` are marked as *no_gradient*, the `grad_op` itself can be skipped in backward pass. + +Another situation is all the gradient inputs of some `grad_op` are marked as *no_gradient*, which means all of them can be considered as zeros. For `grad_op`s are in essence the propagation of gradients, all the outputs are definitely zeros when all gradient inputs are zeros. Therefore the `grad_op` can also be skipped. + +It should be noted that all these zero gradients still need to be creating and initialized by something, otherwise following `grad_op`s who take these gradients as inputs take the risk of using uninitialized memory. In our code, we employ `fill_zeros_like_op` to initialize them as all zeros. + +This features are implemented in function `_remove_no_grad_branch_`. It checks new created `grad_op`s one-by-one, removes who can be skipped and inserts `fill_zeros_like_op` when its necessary. We can get the `no_grad_set` from the `_append_backward_ops_` argument `no_grad_dict` or generate it on the fly by scanning all variables' `no_gradient` attribute(True or False). + +### Creating Backward Variables + +Up to now, we have completed all creating and adjusting jobs of `grad_op`s. However, backward variables have not been created. Now they are only represented by `grad_op`'s input and output arguments. The backward variable creating job will be done by: + +```python +def _append_backward_vars_(block, + start_op_idx, + grad_to_var, + grad_info_map): + """ + Create new variables required by backward pass. + + Args: + block(Block): the block where new variables will be created + start_op_idx(int): Only variables required by ops in block.ops[start_op_idx : ] will be created + grad_to_var(dict): + key(str): grad variable name + val(str): corresponding forward variable name + In most cases, this dict is generated by _append_backward_ops_() + grad_info_map(dict)(output argument): + key(str): forward variable name + val(tuple): a tuple of (str, int), str is the corresponding grad name, int is the block index + """ +``` + +Given a `block`, this function traverses all the `grad_op`s in it(The argument `start_op_idx` indicates where the grad_op sequence starts.) and creates all the uncreated outputs. The *pseudo-code* shows this process: + +``` +for op in block.ops[start_op_idx : ]: + + if op has an attribute named 'sub_block': + Get the sub-block(`s_block`) from op's attribute. + Invoke _append_backward_vars_(), with `block=s_block` + + for var_name in op.all_output_names(): + if block.has_var_recursive(var_name) or var_name is the name of empty variable: + continue + create a new variable named 'var_name' in block + if grad_to_var.has_key(var_name): + set grad_info_map[grad_to_var[var_name]] as a tuple of (var_name. block) + + do op's var type inference + do op's shape inference +``` diff --git a/doc/design/block.md b/doc/design/block.md index 4066122c0e8dfa33776796c3d205ba5aec9e0f52..fab7f2dc481ae51aa982164dc5048d90fcdc2b0b 100644 --- a/doc/design/block.md +++ b/doc/design/block.md @@ -291,10 +291,10 @@ public: } void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + const platform::Place& place) const override { PADDLE_ENFORCE(symbols_ready_, "operators and variables should be created first."); for (auto& op : runtime_table_.ops()) { - op->Run(scope, dev_ctx); + op->Run(scope, place); } } diff --git a/doc/design/ci_build_whl.png b/doc/design/ci_build_whl.png new file mode 100644 index 0000000000000000000000000000000000000000..232762b82a9ae3e979a1f38a7beb715c87438f40 Binary files /dev/null and b/doc/design/ci_build_whl.png differ diff --git a/doc/design/concurrent_programming.md b/doc/design/concurrent_programming.md new file mode 100644 index 0000000000000000000000000000000000000000..afc65e831d58ff427663806e56294292ccbef85b --- /dev/null +++ b/doc/design/concurrent_programming.md @@ -0,0 +1,163 @@ +# Design Doc: Concurrent Programming with Fluid + +With PaddlePaddle Fluid, users describe a program other than a model. The program is a [`ProgramDesc`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto) protobuf message. TensorFlow/MxNet/Caffe2 applications generate protobuf messages too, but their protobuf messages represent the model, a graph of operators, but not the program that trains/uses the model. + +Many know that when we program TensorFlow, we can specify the device on which each operator runs. This allows us to create a concurrent/parallel AI application. An interesting questions is **how does a `ProgramDesc` represents a concurrent program?** + +The answer relies on the fact that a `ProgramDesc` is similar to an abstract syntax tree (AST) that describes a program. So users just program a concurrent program that they do with any concurrent programming language, e.g., [Go](https://golang.org). + +## An Analogy + +The following table compares concepts in Fluid and Go + +| Go | Fluid | +|----|-------| +|user-defined functions | [layers](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid) | +| control-flow and built-in functions | [intrinsics/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators) | +| goroutines, channels | [class ThreadPool](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework/thread_pool.h) | +| runtime | [class Executor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h) | + +## An Example Concurrent Program + +To review all above concepts in an example, let us take a simple program and writes its distributed version. + +Suppose that we want to parallelize a naive Fluid program (written in Go and calling Fluid's Go binding) that multiplies two tensors. + +```go +import "fluid" + +func paddlepaddle() { + X = fluid.read(...) + W = fluid.Tensor(...) + Y = fluid.mult(X, W) +} +``` + +Please be aware that the Fluid's Go binding provides the default `main` function, which calls the `paddlepaddle` function, which, in this case, is defined in above program and creates the following `ProgramDesc` message. + +```protobuf +message ProgramDesc { + block[0] = Block { + vars = [X, W, Y], + ops = [ + read(output = X) + assign(input = ..., output = W) + mult(input = {X, W}, output = Y) + ], + } +} +``` + +Then, the default `main` function calls `fluid.run()`, which creates an instance of the [`class Executor`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h) and calls `Executor.Run(block[0])`, where `block[0]` is the first and only block defined in above `ProgramDesc` message. + +The default `main` function is defined as follows: + +```go +func main() { + paddlepaddle() + fluid.run() +} +``` + +## The Concurrent Version + +By parallelizing the above program, we could support very big tensor X by splitting into small pieces {x_1, x_2, ...} and sent each piece to worker process/node for parallel multiplication. + +In this case, we can write a transpiler that takes a `ProgramDesc` message that represents the above example program and outputs two `ProgramDesc` messages, one for running on the master process/node, and the other one for worker processes/nodes. + +### The Master Program + +The master program could look like the following: + +```protobuf +message ProgramDesc { + block[0] = Block { + vars = [X, L, Y], + ops = [ + read(output = X) + kube_get_workers_addrs(output = L) + Y = tensor_array(len(L)) + parallel_for(input = X, output = Y, + attrs = {L, block_id(1)}) # referring to block 1 + ] + } + + block[1] = Block { + parent = 0, + vars = [x, y, index], + ops = [ + slice(input = [X, index], output = x) # index is initialized by parallel_for + send(input = x, attrs = L[index]) + recv(outputs = y, attrs = L[index]) + assign(input = y, output = Y[index]) + ] + } +} +``` + +The equivalent Fluid program (calling the Go binding) is: + +```go +func main() { //// block 0 + X = fluid.read(...) + L = fluid.k8s.get_worker_addrs() + Y = fluid.tensor_array(len(L)) + fluid.parallel_for(X, L, + func(index int) { //// block 1 + x = X[index] + fluid.send(L[index], x) + y = fluid.recv(L[index]) + Y[index] = y + }) +} +``` + +An explanation of the above program: + +- `fluid.k8s` is a package that provides access to Kubernetes API. +- `fluid.k8s.get_worker_addrs` returns the list of IP and ports of all pods of the current job except for the current one (the master pod). +- `fluid.tensor_array` creates a [tensor array](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor_array.h). `fluid.parallel_for` creates a `ParallelFor` intrinsic, which, when executed, + + 1. creates `len(L)` scopes, each for the concurrent running of the sub-block (block 1 in this case), and initializes a variable named "index" in the scope to an integer value in the range `[0, len(L)-1]`, and + 2. creates `len(L)` threads by calling into the `ThreadPool` singleton, each thread + 1. creates an Executor instance, and + 2. calls `Executor.Run(block)`, where `block` is block 1 as explained above. +1. Please be aware that block 1 is a sub-block of block 0, so ops in block 1 could refer to variables defined in block 0. + +### The Worker Program + +The worker program looks like + +```go +func main() { + W = Tensor(...) + x = fluid.listen_and_do( + fluid.k8s.self_addr(), + func(input Tensor) { + output = fluid.mult(input, W) + }) +} +``` + +where + +- `fluid.listen_and_do` creates a `ListenAndDo` intrinsic, which, when executed, + 1. listens on the current pod's IP address, as returned by `fliud.k8s.self_addr()`, + 2. once a connection is established, + 1. creates a scope of two parameters, "input" and "output", + 2. reads a [Fluid variable](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/variable.h) and saves it into "input", + 3. creates an Executor instance and calls `Executor.Run(block)`, where the block is generated by running the lambda specified as the second parameter of `fluid.listen_and_do`. + +## Summarization + +From the above example, we see that: + +1. Fluid enables the imperative programming paradigm by: + 1. letting users describe a program, but not a model (a sequence of layers, or a graph of operators), and + 2. call the `fluid.run` function that runs the program implicitly. +1. The program is described as a `ProgramDesc` protobuf message. +2. Function `Executor.Run` takes a block, instead of a `ProgramDesc`, as its parameter. +3. `fluid.run` calls `Executor.Run` to run the first block in the `ProgramDesc` message. +4. `Executor.Run`'s implementation is extremely simple -- it doesn't plan the execution nor create threads; instead, it runs on the current thread and execute intrinsics/operators' `Run` method sequentially as they appear in the `Block.ops` array. +5. Intrinsics/operators' `Run` method might create threads. For example, the `ListenAndDo` operator creates a thread to handle each incoming request. +6. Threads are not necessarily OS thread; instead, they could be [green threads](https://en.wikipedia.org/wiki/Green_threads) managed by ThreadPool. Multiple green threads might run on the same OS thread. An example green threads is Go's [goroutines](https://tour.golang.org/concurrency/1). diff --git a/doc/design/images/control_flow_graph.png b/doc/design/images/control_flow_graph.png new file mode 100644 index 0000000000000000000000000000000000000000..3579998e58d07abc50bd3332128d4733a391cb3b Binary files /dev/null and b/doc/design/images/control_flow_graph.png differ diff --git a/doc/design/images/dataflow_equations.png b/doc/design/images/dataflow_equations.png new file mode 100644 index 0000000000000000000000000000000000000000..c10f7f69f4007952e5b0394edaa04efa1cfbb658 Binary files /dev/null and b/doc/design/images/dataflow_equations.png differ diff --git a/doc/design/images/deep_learning.png b/doc/design/images/deep_learning.png new file mode 100644 index 0000000000000000000000000000000000000000..026becc4d94e01e407dacb2a5314a0e5723334ff Binary files /dev/null and b/doc/design/images/deep_learning.png differ diff --git a/paddle/framework/images/duplicate_op.graffle b/doc/design/images/duplicate_op.graffle similarity index 100% rename from paddle/framework/images/duplicate_op.graffle rename to doc/design/images/duplicate_op.graffle diff --git a/paddle/framework/images/duplicate_op.png b/doc/design/images/duplicate_op.png similarity index 100% rename from paddle/framework/images/duplicate_op.png rename to doc/design/images/duplicate_op.png diff --git a/paddle/framework/images/duplicate_op2.graffle b/doc/design/images/duplicate_op2.graffle similarity index 100% rename from paddle/framework/images/duplicate_op2.graffle rename to doc/design/images/duplicate_op2.graffle diff --git a/paddle/framework/images/duplicate_op2.png b/doc/design/images/duplicate_op2.png similarity index 100% rename from paddle/framework/images/duplicate_op2.png rename to doc/design/images/duplicate_op2.png diff --git a/doc/design/images/profiler.png b/doc/design/images/profiler.png new file mode 100644 index 0000000000000000000000000000000000000000..d57b71ca88aaba5d05584a6219d84214e285a1e1 Binary files /dev/null and b/doc/design/images/profiler.png differ diff --git a/doc/design/memory_optimization.md b/doc/design/memory_optimization.md new file mode 100644 index 0000000000000000000000000000000000000000..00f514711a46bfd5af3bae51e0d9225ecc4c8998 --- /dev/null +++ b/doc/design/memory_optimization.md @@ -0,0 +1,217 @@ +# Memory Optimization + + +## Problem + +In a lecture from Andrew Ng, he attributes the recent sucess of AI due to a combination of these: + +- availability of Big Data +- supercomputing power to process this Big Data over very large neural networks +- modern algorithms + +Following graph shows the details: + +![](images/deep_learning.png) + +Larger model usually brings better performance. However, GPU memory is certain limited. For example, the memory size of a GTX TITAN X is only 12GB. To train complex and large model, we have to take care of memory using. Besides, memory optimization is also necessary in both online/mobile inference. + +## Solution + +### Basic Strategy + +There are some basic strategies to make memory optimization, including in-place operation and memory sharing. + +#### In-place Operation +In a relu activation operator: + +$y = \max(x, 0)$ + +If the variable x is not used in any other operator, we can make an in-place operation. In other words, the memory block of variable y and variable x are the same. In-place operation will save 50% memory occupancy immediately. + +#### Memory Sharing + +Not all operators support in-place operations. Memory sharing is a more general strategy. + +Following is an example: + +``` +a = op1(b, c); +d = op2(a) +e = op3(d, f) +``` + +In this case, variable a is no longer used, and op2 does not support in-place operation. After op2 finished, we can put the memory of variable a to a memory pool. Then, variable e can share the memory of variable a from the pool. + + +### Live Variable Analysis + +It's not enough to only have some basic strategies. The prerequisite of memory optimization is to know if a variable is still "live" after an operation. + +In our design, the neural network topology is defined as a program. Luckily, [live variable analysis](https://en.wikipedia.org/wiki/Live_variable_analysis) is a classic problem in compilers which can be used in many stages, such as register allocation. + +In compilers, the front end of the compilers translates programs into an intermediate language with an unbounded number of temporaries. This program must run on a machine with a bounded number of registers. Two temporaries a and b can fit into the same register, if a and b are never "in use" at the same time. Thus, many temporaries can fit in few registers; if they don't all fit, the excess temporaries can be kept in memory. + +Therefore, the compiler needs to analyze the intermediate-representation program to determine which temporaries are in use at the same time. We say a variable is "live" if it holds a value that may be needed in the future, so this analysis is called liveness analysis. + +We can leran these techniques from compilers. There are mainly two stages to make live variable analysis: + +- construct a control flow graph +- solve the dataflow equations + + +#### Control Flow Graph +To preform analyses on a program, it is often useful to make a control flow graph. A [control flow graph](https://en.wikipedia.org/wiki/Control_flow_graph) (CFG) in computer science is a representation, using graph notation, of all paths that might be traversed through a program during its execution. Each statement in the program is a node in the flow graph; if statemment x can be followed by statement y, there is an egde from x to y. + +Following is the flow graph for a simple loop. + +![](images/control_flow_graph.png) + +#### Dataflow Analysis + +liveness of variable "flows" around the edges of the control flow graph; determining the live range of each variable is an example of a dataflow problem. [Dataflow analysis](https://en.wikipedia.org/wiki/Data-flow_analysis) is a technique for gathering information about the possible set of values calculated at various points in a computer program. + +A simple way to perform data-flow analysis of programs is to set up dataflow equations for each node of the control flow graph and solve them by repeatedly calculating the output from the input locally at each node until the whole system stabilizes. + +- Flow Graph Terminology + +A flow graph node has out-edges that lead to sucessor nodes, and in-edges that come from presucessor nodes. The set *pred[n]* is all the predecessors of node n, and *succ[n]* is the set of sucessors. +In former control flow graph, the out-edges of node 5 are 5 --> 6 and 5 --> 2, and *succ[5]* = {2, 6}. The in-edges of 2 are 5 --> 2 and 1 --> 2, and *pred[2]* = {1, 5}. + +- Uses and Defs + +An assignmemt to a variable or temporary defines that variable. An occurence of a variable on the right-hand side of an assginment(or in other expressions) uses the variable. We can speak the *def* of a variable as the set of graph nodes that define it; or the *def* of a graph node as the set of variables that it defines; and the similarly for the *use* of a variable or graph node. In former control flow graph, *def(3)* = {c}, *use(3)* = {b, c}. + +- Liveness + +A variable is *live* on an edge if there is a directed path from that edge to a *use* of the variable that does not go through any *def*. A variable is *live-in* at a node if it is live on any of the in-edges of that node; it is *live-out* at a node if it is live on any of the out-edges of the node. + + +The calcution of liveness can be solved by iteration until a fixed pointer is reached. Following is the recursive formula: + +![](images/dataflow_equations.png) + +### Memory optimization transpiler + +At last, we take basic strategy and liveness analysis techniques learning from compilers to implement our memory optimization transpiler. + +#### add in-place attribute + +In-place is a built-in attribute of an operator. Since we treat in-place and other operators differently, we have to add an in-place attribute for every operator. + + +#### contruct control flow graph + +Following is the ProgramDesc protobuf of [machine translation](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book/test_machine_translation.py) example. + +- Block0: + +``` +lookup_table +mul +... +while(sub-block idx 1) +... +array_to_lod_tensor +cross_entropy +... +while_grad(sub-block idx 2) +read_from_array +array_to_lod_tensor +... +``` + +- Block1 + +``` +read_from_array +read_from_array +... +write_to_array +increment +write_to_array +less_than +``` + +- Block2 + +``` +read_from_array +increment +... +write_to_array +write_to_array +``` + +We can transfer all the operators and variables in ProgramDesc to build a control flow graph. + +```python +class ControlFlowGraph(object): + def __init__(self, Program): + self._sucessors = defaultdict(set) + self._presucessors = defaultdict(set) + self._uses = defaultdict(set) + self._defs = defaultdict(set) + self._live_in = defaultdict(set) + self._live_out = defaultdict(set) + self._program = Program + + def build(self): + pass + + def dataflow_analysis(self): + pass + + def memory_optimization(self): + pass + + def get_program(self): + return self._program +``` + +#### make dataflow analysis + +We follow guide from compilers and try to solve the dataflow equation to get liveness of every variable. If the live-in of an operator node is different from the live-out, then we can make memory sharing. + +For example: + +``` +a = op1(b, c); +d = op2(a) +e = op3(d, f) +``` + +The dataflow analysis result is: + +``` +live_in(op1) = {b, c, f} +live_out(op1) = {a, f} + +live_in(op2) = {a, f} +live_out(op2) = {d, f} + +live_in(op3) = {d, f} +live_out(op3) = {} +``` + +After op1, we can process variable b and variable c; After op2, we can process variable a. After op3, we can process variable d and variable f. + +#### memory sharing policy + +A memory pool will be mantained in the stage of memory optimization. Each operator node will be scanned to determine memory optimization is done or not. If an operator satifies the requirement, following policy will be taken to handle input/output variables. + +``` +if op.support_inplace(): + i --> pool + pool --> o +else: + pool --> o + i --> pool +``` + + + +## Reference + +- [Lecture Notes From Artificial Intelligence Is The New Electricity By Andrew Ng](https://manavsehgal.com/lecture-notes-from-artificial-intelligence-is-the-new-electricity-by-andrew-ng-4712dcbf26e5) +- Modern compiler implementation in ML, by Andrew W. Appel +- [Optimizing Memory Consumption in Deep learning](https://mxnet.incubator.apache.org/architecture/note_memory.html) diff --git a/doc/design/mkl/mkldnn_fluid.md b/doc/design/mkl/mkldnn_fluid.md new file mode 100644 index 0000000000000000000000000000000000000000..bef126f3f0577b69f646dfe5d10539b372c6a8a5 --- /dev/null +++ b/doc/design/mkl/mkldnn_fluid.md @@ -0,0 +1,149 @@ +# Design Doc: Add MKLDNN Kernel in Fluid Operator + +## Principles + +First of all, we should follow some basical principles like: +1. [How to write a new operator](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md). We are trying to add a new kind of kernel into operators, so basically we should follow this doc. +2. [Supporting new Device/Library](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/support_new_device.md). Since MKLDNN is a new library to fluid, we should add `MKLDNNDeviceContext` and maybe `mkldnn_helper.h`, just like [cudnn_helper.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/cudnn_helper.h). +3. [Switch Kernel](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md). Another important point is that we should ensure the data synchronization between different kernel types, which is this [topic](https://github.com/PaddlePaddle/Paddle/issues/6549). So basically we should override `GetExpectedKernelType` and `trans` functions to support switching kernels. +4. [The Keys of Operator Kernel Type](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md). Kernel Type is a pivotal conception which can record the `Place`, `Library`, `DataType` and `Layout`. + +## Sulution + +In general, there are four parts we should follow to run a MKL-DNN primitive. +- Create a primitive descriptor that describe this operator +- Create a primitive itself by primitive descriptor and the engine +- Create all memory buffers that primitive needed +- Launch a stream to execute the primitive created +More details can refer to [here](http://01org.github.io/mkl-dnn). + +It's better to avoid reinitialization of primitives and memory handles in the first three stages in every iteration. \ +So we plan to create a map to record all the `primitive` and `memory`, which should not take too much memories as discussed [here](https://github.com/PaddlePaddle/Paddle/issues/6822). + +It's assumed that following three conditions should be satisfied. +1. there is a unique key for each operator instance. May be the actual name of `Output Tensor`. +2. the `Input Tensor` inside `Compute` function is the one after converted. +3. we can get the phase(eg. `is_test`) inside `Compute` function, otherwise we need to expose this attribue to user. + +### Compute +The algorithm of `Compute` would be described as follow, let's take conv like an example. + +```c++ + + PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); + PADDLE_ENFORCE(platform::is_mkldnn_library(ctx.GetLibrary()), "It must use MKLDNN Library."); + + auto& dev_ctx = ctx.template device_context(); + + // find primitive by unique key from mkldnn context + // the op_key should be a unique name of this op instance + auto& p = dev_ctx.findPrimitive(op_key + "_fwd"); + + // assuming the input tensor inside this compute function is the one after converted + // this point should be guarantee by another mechanism + auto& i = dev_ctx.findMemory(op_key + "_input"); + + if (p == nullptr || i == nullptr || inputSizeChanged(p, i)) { + auto fwd_primitive_desc = createPrimitiveDesc(ctx); + auto* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* output = ctx.Output("Output"); + shared_ptr in(new mkldnn::memory(fwd_primitive_desc->src_primitive_desc(), input->data())); + shared_ptr wgt(new mkldnn::memory(fwd_primitive_desc->weights_primitive_desc(), filter->data())); + shared_ptr out(new mkldnn::memory(fwd_primitive_desc->dst_primitive_desc(), output->mutable_data(ctx.GetPlace()))); + shared_ptr fwd_primitive(new mkldnn::conv_fwd(*fwd_primitive_desc, *in, *wgt, *out)); + + dev_ctx.addMemory(op_key+"_input", in); + dev_ctx.addMemory(op_key+"_output", out); + dev_ctx.addMemory(op_key+"_filer", wgt); + dev_ctx.addPrimitive(op_key+"_fwd", fwd_primitive); + dev_ctx.addPrimitiveDesc(op_key+"_fwd_PD", fwd_primitive_desc); + } + + p = dev_ctx.findPrimitive(op_key + "_fwd"); + + PADDLE_ENFORCE(p, "Should have forward Primitive"); + PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_input"), "Should have input memory"); + PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_output"), "Should have output memory"); + PADDLE_ENFORCE(dev_ctx.findMemory(op_unique_key+"_filter"), "Should have filter memory"); + PADDLE_ENFORCE(dev_ctx.findPrimitiveDesc(op_unique_key+"_fwd_PD"), "Should have forward PrimitiveDesc"); + dev_ctx.submit(p); + dev_ctx.execute(); // the convert primitive should have already contained. + +``` + +The `createPrimitiveDesc` returns the primitive descripotor of this operator, would be like this: +```c++ + auto* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* output = ctx.Output("Output"); + std::vector strides = ctx.Attr>("strides"); + std::vector paddings = ctx.Attr>("paddings"); + std::vector dilations = ctx.Attr>("dilations"); + int groups = ctx.Attr("groups"); + algorithm algo = static_cast(ctx.Attr("convolution_algorithm_option")); + prop_kind pk = ctx.Attr("is_test") ? prop_kind::forward_inference : prop_kind::forward_training; + + auto fwd_desc = mkldnn::conv_fwd::desc(/* all the setting above*/); + shared_ptr fwd_primitive_desc(new mkldnn::conv_fwd::primitive_desc(fwd_desc, ctx.getEngine())); + + return fwd_primitive_desc; + } +``` + +### MKLDNNDeviceContext +`MKLDNNDeviceContext`, which is very straightforward, should contain some base information like: `stream`, `engine` and the map needed. + + +### mkldnn_helper +Some functions would be put in `paddle/platform/mkldnn_helper.h`. +- create MKLDNN memories +- create MKLDNN primitives +- error check function +- etc + + +### Kernel Switch +We should `reorder` the different Layout from other device or to other device. `GetExpectedKernelType` and `trans` functions can help us to implement it. + +`GetExpectedKernelType` should get the context, and this operator can return the best `KernelType`. +`trans` would be like this: + +```c++ +void trans(inputs, ctx) override { + if (NoNeedTrans()) { + return; + } + // find reorder primitive by op_key from context + auto& dev_ctx = ctx.template device_context(); + auto& p = dev_ctx.findPrimitive(op_key + "_reorder_input"); + auto& i = dev_ctx.findMemory(op_key + "_src_input"); + + if (p == nullptr || i == nullptr || changeSized(i, input)) { + auto prim = createPrimitiveDesc(ctx); + auto src = createMemory(memoryDesc(input->dims(), actual_layout), input->data); + auto newbuffer = paddle::memory::Alloc(ctx.GetPlace(), input->size_in_bytes()); + auto dst = createMemory(p->expected_desc(), newbuffer->data); + auto reorder_primitive(new mkldnn::reorder(src, dst)); + + dev_ctx.addMemory(op_key+"_src_input", src); + dev_ctx.addMemory(op_key+"_input", dst); + dev_ctx.addPrimitive(op_key+"_reorder_input", reorder_primitive); + } + + p = dev_ctx.findPrimitive(op_key + "_reorder_input"); + PADDLE_ENFORCE(p, "Should have Reorder Primitive"); + dev_ctx.submit(p); + if (! this->isMKLDNNKernel()) { + // execute immediately only if this is not mkldnn kernel function. + // otherwise, it can be executed with the operator primitive in Compute + dev_ctx.stream(); + } + // after submit, the input tensor in ExecutionContext should be changed as the converted one + // there should be another mechanism to ensure this +} +``` + +### Unit Test +All the functions should be tested corresponding. +TBD diff --git a/doc/design/optimizer.md b/doc/design/optimizer.md index 202b4b65103c0b7c536a9cb466c4120ce134d8c3..691081c268b848811bf5ee6d6a41edfe0f47eec0 100644 --- a/doc/design/optimizer.md +++ b/doc/design/optimizer.md @@ -79,7 +79,7 @@ class Optimizer(object): def minimize(self, loss, parameter_list): """Add operations to minimize `loss` by updating `parameter_list`. - This method combines interface `append_backward_ops()` and + This method combines interface `append_backward()` and `create_optimization_pass()` into one. """ params_grads = self.create_backward_pass(loss, parameter_list) diff --git a/doc/design/profiler.md b/doc/design/profiler.md new file mode 100644 index 0000000000000000000000000000000000000000..b20b5efdc1f1f10ce7cec835adcc6fb374ed4e20 --- /dev/null +++ b/doc/design/profiler.md @@ -0,0 +1,97 @@ +## Introduction + +There are many performance analysis tools for [different programming languages and different software frameworks](https://en.wikipedia.org/wiki/List_of_performance_analysis_tools). For most popular deep learning frameworks, they use several programming languages and adapt to heterogeneous platforms. Similar to most of the deep learning frameworks, PaddlePaddle also uses C++, CUDA and Python as the basic programming languages to adapt to run on CPU and GPU devices. The [`nvprof` tools](http://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvprof-overview) is usually used to analyse the CUDA program. We have [a document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/optimization/cpu_profiling.md) to profile CPU and Python program by [yep](https://pypi.python.org/pypi/yep) and [Google's perftools](https://github.com/google/pprof) to profile only the CPU and Python program. But for [PaddlePaddle fluid](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/fluid.md), the operator is the basic computing unit. The developers usually want to collect the time of each operator and locate bottlenecks. The `nvprof` usually collect the timeline of CUDA-related activities on both CPU and GPU, including kernel execution, memory transfers, memory set and CUDA API calls and events or metrics for CUDA kernels. And the `yep` and `Google's perftools` can't collect the timeline for CUDA program. All these tools can't collect time in the operator level. So we design this profiling tool. + +## Architecture + +The work flow for most task is as follows. Each operator will run many times in the all iterations. So the profiler must collect the total time of each operator during the iteration. For more, sometimes, the developers may want to collect more detailed time span inside the operator or record time span for elsewhere, this requires that the profiler must support to record the nested time span. And in order to speedup training, all the deep learning frameworks support parallel computing, including multiple threads on CPU and multiple GPUs. So the profiler must be able to collect the timeline for each thread. In addition, the profiler also occupies certain resources. It must can be easily to be enabled or disabled by the developers. At last, the profiler should present a human-readable report. + +```python +for i in xrange(M): # M is the iteration number + for op in operator_lists: # The `operator_lists` contains all the operators in the network. + op.run(); +``` + +In summary, the proflier should have following features: + +- records time span in loop. +- supports nested time span. +- supports multiple threads/multiple GPUs. +- supports to be enabled and disabled by users. + +But how to record the time for the mixed C++ and CUDA program? There many C++ APIs to get the current calendar time in host program. But for GPU, the CUDA kernels may be executed concurrently if they are in different [streams](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams) and the CUDA kernels is asynchronous with the host program if there is no the synchronous aftern the CUDA kernels. CUDA provides [event](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#events) to monitor the device and perform accurate timing. Inspired by PyTorch and CUDA event, we also design and apply the events to record the timeline. Then summarize and present statistics based on these events. + +The overall flow is shown as the following figure. + +
+ +### Event + +In above work flow, a pair of events are needed before and after the piece of code to collect time. So the event has a flag to mark whether it is a starting event or an ending event. Except this two kinds of event, sometime, a only marker with a text message is needed, for example, a marker to specify the profiling start or end. There are three kinds of event: + +```c++ +enum EventKind { + kMark, + kPushRange, + kPopRange}; +``` +- kMark: only a marker without time range. +- kPushRange: mark the starting event for time range. +- kPopRange: mark the ending event for time range. + +For the CPU code, the events only need to record the current time. For the CUDA code, the [event management functions of CUDA](http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT) are used. For many pieces of code, an event lists are used to record each piece. + +```c++ +class Event { + public: + // The DeviceContext is used to get current CUDA stream. + Event(EventKind kind, std::string name, uint32_t thread_id, + const platform::DeviceContext* dev_ctx = nullptr); + double CpuElapsedUs(const Event& e) const; + double CudaElapsedUs(const Event& e) const; + + private: + EventKind kind_; + std::string name_; + uint32_t thread_id_; + int64_t cpu_ns_; +#ifdef PADDLE_WITH_CUDA + cudaEvent_t event_ = nullptr; + int device_ = -1; +#endif +}; + +struct EventList { + std::forward_list> event_blocks; +}; +``` + +As mentioned above, there is no need to record the timeline when disabling the profiler. So there is a global state to enable or disable the profiler. + +```c++ +enum ProfilerState { + kDisabled, + kCPU, + kCUDA +}; +ProfilerState g_state; +``` +- kDisabled: the disabled state. +- kCPU: CPU profiling state. +- kCUDA: GPU profiling state. + +A pair of starting and ending events are pushed to event lists in constructor and destructor of `RecordEvent`. So the timeline is recorded for the code in the lifecycle of an object of `RecordEvent`. + +```c++ +struct RecordEvent { + explicit RecordEvent(const std::string name, + platform::DeviceContext* dev_ctx = nullptr) { + if (kState == ProfilerState::kDisabled) return; + // push the starting event to the event lists. + } + ~RecordEvent() { + if (kState == ProfilerState::kDisabled) return; + // push the ending event to the event lists. + } +}; +``` diff --git a/doc/design/releasing_process.md b/doc/design/releasing_process.md index 14c081ea84282e52a2e36475c3c0ea755122d154..b9787261092f1f27377886152cb1596d9ff54188 100644 --- a/doc/design/releasing_process.md +++ b/doc/design/releasing_process.md @@ -7,11 +7,9 @@ PaddlePaddle每次发新的版本,遵循以下流程: 1. 从`develop`分支派生出新的分支,分支名为`release/版本号`。例如,`release/0.10.0` 1. 将新分支的版本打上tag,tag为`版本号rc.Patch号`。第一个tag为`0.10.0rc1`,第二个为`0.10.0rc2`,依次类推。 1. 对这个版本的提交,做如下几个操作: + * 使用Regression Test List作为检查列表,测试本次release的正确性。 + * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,到第二步 * 修改`python/setup.py.in`中的版本信息,并将`istaged`字段设为`True`。 - * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步 - * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 - * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 - * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 * 编译这个版本的python wheel包,并发布到pypi。 * 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。 * pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。 @@ -21,8 +19,8 @@ PaddlePaddle每次发新的版本,遵循以下流程: pip install twine twine upload dist/[package to upload] ``` + * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步 1. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 -1. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 1. 协同完成Release Note的书写 @@ -31,6 +29,30 @@ PaddlePaddle每次发新的版本,遵循以下流程: * `release/版本号`分支一旦建立,一般不允许再从`develop`分支合入`release/版本号`。这样保证`release/版本号`分支功能的封闭,方便测试人员测试PaddlePaddle的行为。 * 在`release/版本号`分支存在的时候,如果有bugfix的行为,需要将bugfix的分支同时merge到`master`, `develop`和`release/版本号`这三个分支。 +## 发布wheel包到pypi + +使用[PaddlePaddle CI](https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview) +完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以 +弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。等待编译完成后 +可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,`cp27m`和`cp27mu`的版本。然后按照上述的方法 +使用`twine`工具上传即可。 + + + +* 注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux + 发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。 +* pypi不支持覆盖上传,所以一个版本号的wheel包发布之后,不可以更改。下一个wheel包需要更新版本号才可以上传。 + +## 发布Docker镜像 + +上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上 +版本号对应的tag即可: + +1. 进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看latest tag的更新时间是否在上述编译wheel包完成后是否最新。 +1. 执行 `docker pull paddlepaddle/paddle:[latest tag]`,latest tag可以是latest或latest-gpu等。 +1. 执行 `docker tag paddlepaddle/paddle:[latest tag] paddlepaddle/paddle:[version]` +1. 执行 `docker push paddlepaddle/paddle:[version]` + ## PaddlePaddle 分支规范 PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,并适应github的特性做了一些区别。 diff --git a/doc/design/support_new_device.md b/doc/design/support_new_device.md index fd23dc211a35fdc9d87bc9233fcf4e90254da748..4c5f10e2ecb9ec09b78926ca27552741d02d7cc9 100644 --- a/doc/design/support_new_device.md +++ b/doc/design/support_new_device.md @@ -25,13 +25,14 @@ There are mainly three parts that we have to consider while integrating a new de ### Place and DeviceContext +Please remind that device and computing library are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices. #### Place -Fluid uses class [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L55) to represent different devices and computing libraries. There are inheritance relationships between different kinds of `Place`. +Fluid uses class [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L55) to represent the device memory where data is located. If we add another device, we have to add corresponding `DevicePlace`. ``` - | CPUPlace --> MKLDNNPlace -Place --| CUDAPlace --> CUDNNPlace + | CPUPlace +Place --| CUDAPlace | FPGAPlace ``` @@ -43,12 +44,12 @@ typedef boost::variant Place; #### DeviceContext -Fluid uses class [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L30) to manage the resources in different hardwares, such as CUDA stream in `CDUADeviceContext`. There are also inheritance relationships between different kinds of `DeviceContext`. +Fluid uses class [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L30) to manage the resources in different libraries, such as CUDA stream in `CDUADeviceContext`. There are also inheritance relationships between different kinds of `DeviceContext`. ``` - /-> CPUDeviceContext --> MKLDeviceContext -DeviceContext ----> CUDADeviceContext --> CUDNNDeviceContext + /-> CPUDeviceContext +DeviceContext ----> CUDADeviceContext \-> FPGADeviceContext ``` @@ -78,16 +79,6 @@ private: }; ``` -- CUDNNDeviceContext - -``` -class CUDNNDeviceContext : public CUDADeviceContext { - private: - cudnnHandle_t cudnn_handle_; -}; -``` - - ### Memory and Tensor @@ -106,7 +97,7 @@ template size_t Used(Place place); ``` -To implementing these interfaces, we have to implement MemoryAllocator for different Devices +To implement these interfaces, we have to implement MemoryAllocator for different Devices. #### Tensor @@ -243,6 +234,7 @@ REGISTER_OP_CUDA_KERNEL( Generally, we will impelement OpKernel for all Device/Library of an Operator. We can easily train a Convolutional Neural Network in GPU. However, some OpKernel is not sutibale on a specific Device. For example, crf operator can only run on CPU, whereas most other operators can run at GPU. To achieve high performance in such circumstance, we have to switch between different Device/Library. -We will discuss how to implement an efficient OpKernel switch policy. +For more details, please refer to following docs: -- TBD +- operator kernel type [doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md) +- switch kernel [doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md) diff --git a/doc/faq/build_and_install/index_cn.rst b/doc/faq/build_and_install/index_cn.rst index a2bdeead7841393fdfe90c78e5b91d9e61678a24..ed8a0c7e87da133138ecfc7ba6a8217d58b8f71d 100644 --- a/doc/faq/build_and_install/index_cn.rst +++ b/doc/faq/build_and_install/index_cn.rst @@ -109,3 +109,31 @@ PaddlePaddle使用avx SIMD指令提高cpu执行效率,因此错误的使用二 解决办法是: * 卸载PaddlePaddle包 :code:`pip uninstall paddle`, 清理掉老旧的PaddlePaddle安装包,使得单元测试有一个干净的环境。如果PaddlePaddle包已经在python的site-packages里面,单元测试会引用site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。同时,即便设置 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。 + +8. 下载MKLML库失败 +------------------ + +.. code-block:: bash + + make[2]: *** [third_party/mklml/src/extern_mklml-stamp/extern_mklml-download] 错误 4 + make[1]: *** [CMakeFiles/extern_mklml.dir/all] 错误 2 + make[1]: *** 正在等待未完成的任务.... + +原因:网速或SSL链接原因,导致MKLML库下载不成功。 + +解决办法是:手动下载并安装,具体步骤如下。 + +.. code-block:: bash + + // 1. 进入对应的目录 + cd build/third_party/mklml/src/extern_mklml + + // 2. 查看包的大小, 正常情况下是75M,如果小于75M,即下载失败: + du -sh mklml_lnx_2018.0.1.20171007.tgz + + // 3. 手动下载且解压缩,并手动生成download成功标签: + wget --no-check-certificate https://github.com/01org/mkl-dnn/releases/download/v0.11/mklml_lnx_2018.0.1.20171007.tgz -c -O mklml_lnx_2018.0.1.20171007.tgz + tar zxf mklml_lnx_2018.0.1.20171007.tgz + touch ../extern_mklml-stamp/extern_mklml-download + + // 4. 接着编译即可 diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index fa1b6a372728ccac128d2e6e79a6514b8884ea3f..bae42593ddc6f7a7eb47d603752ad6efa9820b45 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -15,7 +15,7 @@ 获取PaddlePaddle的Docker镜像 ------------------------------ -执行下面的命令获取最新的PaddlePaddle Docker镜像 +执行下面的命令获取最新的PaddlePaddle Docker镜像,版本为cpu_avx_mkl: .. code-block:: bash @@ -27,7 +27,7 @@ docker pull docker.paddlepaddle.org/paddle -下载GPU版本的Docker镜像: +下载GPU版本(cuda8.0_cudnn5_avx_mkl)的Docker镜像: .. code-block:: bash @@ -54,7 +54,7 @@ .. _docker_run: 在Docker中执行PaddlePaddle训练程序 ------------------------------- +---------------------------------- 假设您已经在当前目录(比如在/home/work)编写了一个PaddlePaddle的程序 :code:`train.py` (可以参考 `PaddlePaddleBook `_ @@ -82,7 +82,7 @@ .. _docker_run_book: 使用Docker启动PaddlePaddle Book教程 ------------------------------- +----------------------------------- 使用Docker可以快速在本地启动一个包含了PaddlePaddle官方Book教程的Jupyter Notebook,可以通过网页浏览。 PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Notebook。 diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index 06012bf65e75c32957516f6b7f62e09480871b84..56a7c68e4d39c45249fa55a964dc48b7081596a6 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -16,7 +16,7 @@ After you've read above tutorials you may proceed the following steps. Pull PaddlePaddle Docker Image ------------------------------ -Run the following command to download the latest Docker images: +Run the following command to download the latest Docker images, the version is cpu_avx_mkl: .. code-block:: bash @@ -28,7 +28,7 @@ For users in China, we provide a faster mirror: docker pull docker.paddlepaddle.org/paddle -Download GPU version images: +Download GPU version (cuda8.0_cudnn5_avx_mkl) images: .. code-block:: bash @@ -58,7 +58,7 @@ and run: .. _docker_run: Launch your training program in Docker ------------------------------- +-------------------------------------- Assume that you have already written a PaddlePaddle program named :code:`train.py` under directory :code:`/home/work` (refer to diff --git a/doc/getstarted/build_and_install/pip_install_cn.rst b/doc/getstarted/build_and_install/pip_install_cn.rst index a4587f82a984acf243f49834e707fcd66d5b1252..0c741e936b46eda5e7165e4ee54b545b14a28a19 100644 --- a/doc/getstarted/build_and_install/pip_install_cn.rst +++ b/doc/getstarted/build_and_install/pip_install_cn.rst @@ -11,14 +11,14 @@ PaddlePaddle可以使用常用的Python包管理工具 ------------------------------ -执行下面的命令即可在当前机器上安装PaddlePaddle的运行时环境,并自动下载安装依赖软件。 +执行下面的命令即可在当前机器上安装PaddlePaddle的运行时环境,并自动下载安装依赖软件,版本为cpu_avx_openblas。 .. code-block:: bash pip install paddlepaddle -如果需要安装支持GPU的版本,需要执行: +如果需要安装支持GPU的版本(cuda7.5_cudnn5_avx_openblas),需要执行: .. code-block:: bash diff --git a/doc/getstarted/build_and_install/pip_install_en.rst b/doc/getstarted/build_and_install/pip_install_en.rst index 55e31560a0f5087ab69966a6281c6c8573c04204..285ed09805b09790beaef014f6813c227aff33ac 100644 --- a/doc/getstarted/build_and_install/pip_install_en.rst +++ b/doc/getstarted/build_and_install/pip_install_en.rst @@ -12,14 +12,14 @@ Install Using pip ------------------------------ Run the following command to install PaddlePaddle on the current -machine, it will also download requirements. +machine, it will also download requirements, the version is cpu_avx_openblas. .. code-block:: bash pip install paddlepaddle -If you wish to install GPU version, just run: +If you wish to install GPU version (cuda7.5_cudnn5_avx_openblas), just run: .. code-block:: bash diff --git a/doc/getstarted/index_cn.rst b/doc/getstarted/index_cn.rst index a9087be6f350c5656cabb0c64ba0f200d1c666cc..9f6ee25987d51dcca3a37cf0f62a70a5a5a2d89a 100644 --- a/doc/getstarted/index_cn.rst +++ b/doc/getstarted/index_cn.rst @@ -7,13 +7,13 @@ ++++++++ PaddlePaddle支持使用pip快速安装,目前支持CentOS 6以上, Ubuntu 14.04以及MacOS 10.12,并安装有Python2.7。 -执行下面的命令完成快速安装: +执行下面的命令完成快速安装,版本为cpu_avx_openblas: .. code-block:: bash pip install paddlepaddle -如果需要安装支持GPU的版本,需要执行: +如果需要安装支持GPU的版本(cuda7.5_cudnn5_avx_openblas),需要执行: .. code-block:: bash diff --git a/doc/getstarted/index_en.rst b/doc/getstarted/index_en.rst index d14e3f5c0cc90792fce9cb82e65da482c44dc433..063d9d880c82550f7f5d47d3d0b1fff59865bca7 100644 --- a/doc/getstarted/index_en.rst +++ b/doc/getstarted/index_en.rst @@ -8,13 +8,13 @@ Quick Install You can use pip to install PaddlePaddle with a single command, supports CentOS 6 above, Ubuntu 14.04 above or MacOS 10.12, with Python 2.7 installed. -Simply run the following command to install: +Simply run the following command to install, the version is cpu_avx_openblas: .. code-block:: bash pip install paddlepaddle -If you need to install GPU version, run: +If you need to install GPU version (cuda7.5_cudnn5_avx_openblas), run: .. code-block:: bash diff --git a/doc/mobile/cross_compiling_for_android_cn.md b/doc/mobile/cross_compiling_for_android_cn.md index 424d7718c64438496cf0895397babd5408e1ca02..ae24ced770492743065e37654b494caf6b4c5bc0 100644 --- a/doc/mobile/cross_compiling_for_android_cn.md +++ b/doc/mobile/cross_compiling_for_android_cn.md @@ -1,8 +1,9 @@ # Android平台编译指南 用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库: -- 基于Docker容器的编译方式 -- 基于Linux交叉编译环境的编译方式 + +- [基于Docker容器的编译方式](#基于docker容器的编译方式) +- [基于Linux交叉编译环境的编译方式](#基于linux交叉编译环境的编译方式) ## 基于Docker容器的编译方式 Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行,因此,使用基于Docker容器的编译方式,用户可在自己熟悉的开发平台上编译Android平台上适用的PaddlePaddle库。 @@ -16,6 +17,12 @@ $ cd Paddle $ docker build -t username/paddle-android:dev . -f Dockerfile.android ``` +用户也可以使用PaddlePaddle提供的官方开发镜像: + +```bash +$ docker pull paddlepaddle/paddle:latest-dev-android +``` + ### 编译PaddlePaddle C-API库 构建好开发镜像后,即可使用开发镜像来编译Android版PaddlePaddle C-API库。 Android的Docker开发镜像向用户提供两个可配置的参数: @@ -41,23 +48,25 @@ Android的Docker开发镜像向用户提供两个可配置的参数: ANDROID_API - >= 21 + >= 16 21 - 编译`armeabi-v7a`,`Android API 21`的PaddlePaddle库 + ```bash $ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=armeabi-v7a" -e "ANDROID_API=21" username/paddle-android:dev ``` - 编译`arm64-v8a`,`Android API 21`的PaddlePaddle库 + ```bash $ docker run -it --rm -v $PWD:/paddle -e "ANDROID_ABI=arm64-v8a" -e "ANDROID_API=21" username/paddle-android:dev ``` -执行上述`docker run`命令时,容器默认执行[paddle/scripts/docker/build_android.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh)脚本。该脚本中记录了交叉编译Android版PaddlePaddle库常用的CMake配置,并且会根据`ANDROID_ABI`和`ANDROID_API`自动构建独立工具链、进行编译和安装。由于arm64架构要求Android API不小于21。因此当`ANDROID_ABI=arm64-v8a`,`ANDROID_API<21`时,Docker容器中将默认使用`Android API 21`的编译工具链。用户可以参考下文**配置交叉编译参数**章节,根据个人的需求修改定制Docker容器所执行的脚本。编译安装结束之后,PaddlePaddle的C-API库将被安装到`$PWD/install_android`目录,所依赖的第三方库同时也被安装到`$PWD/install_android/third_party`目录。 +执行上述`docker run`命令时,容器默认执行[paddle/scripts/docker/build_android.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build_android.sh)脚本。该脚本中记录了交叉编译Android版PaddlePaddle库常用的CMake配置,并且会根据`ANDROID_ABI`和`ANDROID_API`自动构建独立工具链、进行编译和安装。由于arm64架构要求Android API不小于21。因此当`ANDROID_ABI=arm64-v8a`,`ANDROID_API<21`时,Docker容器中将默认使用`Android API 21`的编译工具链。用户可以参考下文[配置交叉编译参数](#配置交叉编译参数)章节,根据个人的需求修改定制Docker容器所执行的脚本。编译安装结束之后,PaddlePaddle的C-API库将被安装到`$PWD/install_android`目录,所依赖的第三方库同时也被安装到`$PWD/install_android/third_party`目录。 ## 基于Linux交叉编译环境的编译方式 本文档将以Linux x86-64平台为例,介绍交叉编译Android平台上适用的PaddlePaddle库的方法和步骤。 @@ -83,6 +92,7 @@ your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain 此命令将在`your/path/to/arm_standalone_toolchain`目录生成一套独立编译工具链,面向架构为32位ARM架构,支持的最小的Android API级别为21,支持编译器`arm-linux-androideabi-gcc (GCC) 4.9`和`clang 3.8`。 - 构建`arm64-v8a`、 `Android API 21`的独立工具链: + ```bash your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \ --arch=arm64 --platform=android-21 --install-dir=your/path/to/arm64_standalone_toolchain @@ -90,14 +100,12 @@ your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain 此命令将在`your/path/to/arm64_standalone_toolchain`目录生成一套独立编译工具链,面向架构为64位ARM64架构,支持的最小Android API级别为21,支持编译器`arm-linux-androideabi-gcc (GCC) 4.9`和`clang 3.8`。 -注意:**PaddlePaddle要求使用的编译工具链所支持的Android API级别不小于21**。 - ### 配置交叉编译参数 CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/android.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/android.cmake),以提供一些默认的编译器和编译参数相关配置。注意,从CMake 3.7版本开始,CMake官方对Android平台的交叉编译提供了通用的支持。PaddlePaddle若检测到用户使用的CMake版本不低于3.7时,将会将用户传进来的配置参数传递CMake系统,交由CMake系统本身来处理。有关参数配置的详细说明见[cmake-toolchains](https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html#cross-compiling)。 交叉编译Android版本的PaddlePaddle库时,有一些必须配置的参数: -- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须设置为`Android`。在设置`CMAKE_SYSTEM_NAME=Android`后,PaddlePaddle的CMake系统才认为是在交叉编译Android系统的版本,并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及Android所需`arm_soft_fp_abi`分支的目标机版OpenBLAS库。此外,还会强制设置一些PaddlePaddle参数的值(`WITH_GPU=OFF`、`WITH_AVX=OFF`、`WITH_PYTHON=OFF`、`WITH_RDMA=OFF`)。 +- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须设置为`Android`。在设置`CMAKE_SYSTEM_NAME=Android`后,PaddlePaddle的CMake系统才认为是在交叉编译Android系统的版本,并自动编译PaddlePaddle所需的所有第三方库。此外,还会强制设置一些PaddlePaddle参数的值(`WITH_GPU=OFF`、`WITH_AVX=OFF`、`WITH_PYTHON=OFF`、`WITH_RDMA=OFF`、`WITH_MKL=OFF`、`WITH_GOLANG=OFF`)。 - `WITH_C_API`,必须设置为`ON`。在Android平台上只支持使用C-API来预测。 - `WITH_SWIG_PY`,必须设置为`OFF`。在Android平台上不支持通过swig调用来训练或者预测。 @@ -119,7 +127,7 @@ Android平台可选配置参数: 其他配置参数: - `USE_EIGEN_FOR_BLAS`,是否使用Eigen库进行矩阵计算。可设置`ON/OFF`,默认值为`OFF`。 -- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。 +- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC/CXX`的值;若环境变量`CC/CXX`没有设置,则设置成`cc/c++`编译器。 常用的cmake配置如下: @@ -147,9 +155,10 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ .. ``` -用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。 +用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS`来影响PaddlePaddle的编译过程。 **性能TIPS**,为了达到最快的计算速度,在CMake参数配置上,有以下建议: + - 设置`CMAKE_BUILD_TYPE`为`Release` - 使用`clang`编译工具链 - `armeabi-v7a`时,设置`USE_EIGEN_BLAS=ON`,使用Eigen进行矩阵计算;`arm64-v8a`时,设置`USE_EIGEN_FOR_BLAS=OFF`,使用OpenBLAS进行矩阵计算 diff --git a/doc/mobile/cross_compiling_for_android_en.md b/doc/mobile/cross_compiling_for_android_en.md index 26858581fc1d77a9391520ac0dfd80fbd98f508c..0cf50181df4116beda3aa6faf836eda92edf6066 100644 --- a/doc/mobile/cross_compiling_for_android_en.md +++ b/doc/mobile/cross_compiling_for_android_en.md @@ -1,6 +1,9 @@ # Build PaddlePaddle for Android -There are two approaches to build PaddlePaddle for Android: using Docker and on Linux without Docker. +There are two approaches to build PaddlePaddle for Android: + +- [Cross-Compiling Using Docker](#cross-compiling-using-docker) +- [Cross-Compiling on Linux](#cross-compiling-on-linux) ## Cross-Compiling Using Docker @@ -16,6 +19,12 @@ $ cd Paddle $ docker build -t paddle:dev-android . -f Dockerfile.android ``` +Users can directly use the published Docker image. + +```bash +$ docker pull paddlepaddle/paddle:latest-dev-android +``` + ### Build the Inference Library We can run the Docker image we just created to build the inference library of PaddlePaddle for Android using the command below: @@ -47,7 +56,7 @@ The Docker image accepts two arguments `ANDROID_ABI` and `ANDROID_API`: ANDROID_API - >= 21 + >= 16 21 @@ -93,15 +102,13 @@ Android NDK includes everything we need to build the [*standalone toolchain*](ht The generated standalone toolchain will be in `your/path/to/arm64_standalone_toolchain`. -**Please be aware that the minimum level of Android API required by PaddlePaddle is 21.** - ### Cross-Compiling Arguments CMake supports [choosing the toolchain](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling). PaddlePaddle provides [`android.cmake`](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/android.cmake), which configures the Android cross-compiling toolchain for CMake. `android.cmake` is not required for CMake >= 3.7, which support Android cross-compiling. PaddlePaddle detects the CMake version, for those newer than 3.7, it uses [the official version](https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html#cross-compiling). Some other CMake arguments you need to know: -- `CMAKE_SYSTEM_NAME` must be `Android`. This tells PaddlePaddle's CMake system to cross-compile third-party dependencies. This also changes some other CMake arguments like `WITH_GPU=OFF`, `WITH_AVX=OFF`, `WITH_PYTHON=OFF`, and `WITH_RDMA=OFF`. +- `CMAKE_SYSTEM_NAME` must be `Android`. This tells PaddlePaddle's CMake system to cross-compile third-party dependencies. This also changes some other CMake arguments like `WITH_GPU=OFF`, `WITH_AVX=OFF`, `WITH_PYTHON=OFF`, `WITH_RDMA=OFF`, `WITH_MKL=OFF` and `WITH_GOLANG=OFF`. - `WITH_C_API` must be `ON`, to build the C-based inference library for Android. - `WITH_SWIG_PY` must be `OFF` because the Android platform doesn't support SWIG-based API. @@ -123,7 +130,7 @@ Some Android-specific arguments: Other useful arguments: - `USE_EIGEN_FOR_BLAS`: indicates if using Eigen. Could be `ON` or `OFF`, defaults to `OFF`. -- `HOST_C/CXX_COMPILER`: specifies the host compiler, which is used to build the host-specific protoc and target-specific OpenBLAS. It defaults to the value of the environment variable `CC`, or `cc`. +- `HOST_C/CXX_COMPILER`: specifies the host compiler, which is used to build the host-specific protoc and target-specific OpenBLAS. It defaults to the value of the environment variable `CC/C++`, or `cc/c++`. Some frequent configurations for your reference: @@ -158,6 +165,7 @@ There are some other arguments you might want to configure. - `CMAKE_BUILD_TYPE-Release` optimizes the runtime performance. Our own tip for performance optimization to use clang and Eigen or OpenBLAS: + - `CMAKE_BUILD_TYPE=Release` - `ANDROID_TOOLCHAIN=clang` - `USE_EIGEN_BLAS=ON` for `armeabi-v7a`, or `USE_EIGEN_FOR_BLAS=OFF` for `arm64-v8a`. diff --git a/doc/mobile/cross_compiling_for_ios_en.md b/doc/mobile/cross_compiling_for_ios_en.md index aa390cd61f3fbd75e5a3b342f3559e76da35a918..19bfe86c511c7e43b462f94c8cabba420b3007f1 100644 --- a/doc/mobile/cross_compiling_for_ios_en.md +++ b/doc/mobile/cross_compiling_for_ios_en.md @@ -1,4 +1,4 @@ -# PaddlePaddle Compiling Guide for iOS +# Build PaddlePaddle for iOS This tutorial will walk you through cross compiling the PaddlePaddle library for iOS from the source in MacOS. @@ -98,7 +98,7 @@ You can set other compiling parameters for your own need. I.E. if you are trying - set `CMAKE_BUILD_TYPE` with `Release` - set `IOS_USE_VECLIB_FOR_BLAS` with `ON` -## Compile and install +## Build and install After CMake, run following commands, PaddlePaddle will download the compile 3rd party dependencies, compile and install PaddlePaddle inference library. @@ -109,7 +109,7 @@ $ make install Please Note: if you compiled PaddlePaddle in the source directory for other platforms, do remove `third_party` and `build` directory within the source with `rm -rf` to ensure that all the 3rd party libraries dependencies and PaddlePaddle is newly compiled with current CMake configuration. -`your/path/to/install` directory will have following directories after `compile` and `install`: +`your/path/to/install` directory will have following directories after `make install`: - `include`, contains all the C-API header files. - `lib`, contains PaddlePaddle C-API static library. diff --git a/go/pserver/client/c/test/test_cclient.c b/go/pserver/client/c/test/test_cclient.c index 89c4d7f00aae2a92ae30ba7b4305550d150dd985..05ec421fff6e1c57b0bace080668d3793f85480f 100644 --- a/go/pserver/client/c/test/test_cclient.c +++ b/go/pserver/client/c/test/test_cclient.c @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 7d2becbdd772747d77890321fce6721d8d17fb30..4a98ede278fad85ff2beef3c8e7dd158912f693a 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -24,6 +24,7 @@ else() add_subdirectory(framework) add_subdirectory(operators) add_subdirectory(pybind) + add_subdirectory(inference) endif() if(WITH_SWIG_PY) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 206e298eb27a2daaec5c674d45cfe4b81a6b522d..3967a40136d6493d533ef9aadd2054cc23592879 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -5,10 +5,18 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) +if (WITH_GPU) + nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS ddim place paddle_memory device_context framework_proto) +else() + cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS ddim place paddle_memory device_context framework_proto) +endif () cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) -cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) +if (WITH_GPU) + nv_test(tensor_util_test SRCS tensor_util_test.cc tensor_util_test.cu DEPS tensor) +else() + cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) +endif() cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) @@ -18,9 +26,14 @@ nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) cc_test(variable_test SRCS variable_test.cc) -cc_library(scope SRCS scope.cc DEPS glog) +cc_library(threadpool SRCS threadpool.cc) +cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) + +cc_library(scope SRCS scope.cc DEPS glog threadpool) cc_test(scope_test SRCS scope_test.cc DEPS scope) +cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor framework_proto) +cc_test(data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context) cc_library(attribute SRCS attribute.cc DEPS framework_proto) cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc @@ -28,13 +41,14 @@ device_context) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) -cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute) -cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference) -cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) +cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) +cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog + shape_inference data_transform) +cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) -cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) +nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) py_proto_compile(framework_py_proto SRCS framework.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. @@ -59,5 +73,7 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) -cc_library(init SRCS init.cc DEPS gflags executor place stringpiece) +cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece operator) cc_test(init_test SRCS init_test.cc DEPS init) + +cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 222aee5974ca82c12a9c3b18f7fac4f96ee251bd..85e693434af863bfc3bde29989dbbfc69678d3b7 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/backward.h" #include "paddle/operators/net_op.h" @@ -427,7 +427,8 @@ std::vector> MakeBlockBackward( VLOG(5) << "Making backward " << (*it)->Type() << " op"; std::vector> op_grads; - if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") { + if ((*it)->Type() == "recurrent" || (*it)->Type() == "while" || + (*it)->Type() == "parallel_do") { int step_block_idx = (*it)->GetBlockAttr("sub_block"); BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars, grad_to_var, step_block_idx); diff --git a/paddle/framework/backward.h b/paddle/framework/backward.h index 2d3b75fe6966cb5dad32dc185a3973e92b23e26e..69ee3802369c16a8b21c0710d2008ef3c085cc5c 100644 --- a/paddle/framework/backward.h +++ b/paddle/framework/backward.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md deleted file mode 100644 index ac60be572419b62f4beb644ff192d413c35e19bb..0000000000000000000000000000000000000000 --- a/paddle/framework/backward.md +++ /dev/null @@ -1,100 +0,0 @@ -# Operator/expression 's Backward - -## Motivation - -In Neural Network, most models are solved by the backpropagation algorithm(known as **BP**) at present. Technically, BP calculates the gradient of the loss function, then propagates it back through the networks following the chain rule. Hence we need a module that chains the gradient operators/expressions together to construct the backward pass. Every forward network needs a backward network to construct the full computation graph. The operator/expression's backward pass will be generated with respect to the forward pass. - -## Implementation - -In this design doc, we exported only one API for generating the backward pass. - -```c++ -std::unique_ptr Backward(const OperatorBase& forwardOp, - const std::unordered_set& no_grad_vars); -``` - -The implementation behind it can be divided into two parts, **Backward Operator Creating** and **Backward Operator Building**. - -### Backward Operator Registry - -A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs, and output gradients and then calculate its input gradients. - -| | forward operator | backward operator -| ---------------------- | ---------------- |------------------------- | -| **Operator::inputs_** | Inputs | Inputs, Outputs, OutputGradients | -| **Operator::outputs_** | Outputs | InputGradients | - - In most cases, there is a one-to-one relation between the forward and backward operators. These relations are recorded by a global hash map(`OpInfoMap`). To follow the philosophy of minimum core and to make operators pluggable, the registry mechanism is introduced. - -For example, we have `mul_op`, and we can register its information and corresponding backward operator by the following macro: - -```cpp -REGISTER_OP(mul, MulOp, MulOpMaker, mul_grad, MulOpGrad); -``` - -`mul` is the operator's type. `MulOp` and `MulOpMaker` are the operator class and the operator maker class respectively. - -`mul_grad` is the type of backward operator, and `MulOpGrad` is its class name. - -### Backward Opeartor Creating - -Given a certain forward operator, we can get its corresponding backward operator by calling: - -```cpp -OperatorBase* bwd_op = BuildGradOp(const OperatorBase* fwd_op); -``` - -The function `BuildGradOp` will sequentially execute following processes: - -1. Get the `type_` of given forward operator, and then get the corresponding backward operator's type by looking up the `OpInfoMap`. - -2. Build two maps named `inputs` and `outputs` to temporarily store backward operator's inputs and outputs. Copy forward operator's `inputs_` and `outputs_` to map `inputs`, except these, are not necessary for gradient computing. - -3. Add forward inputs' gradient variables into map `output`, adding forward outputs' gradient variables into map `input`. - -4. Building backward operator with `inputs`, `outputs` and forward operator's attributes. - -### Backward Network Building - -A backward network is a series of backward operators. The main idea of building a backward network is creating backward operators in the inverted sequence and appending them together one by one. There are some corner cases that need special processing. - -1. Op - - When the input forward network is an Op, return its gradient Operator immediately. If all of its outputs are in no gradient set, then return a special `NOP`. - -2. NetOp - - In our design, the network itself is also a kind of operator(**NetOp**). So the operators contained by a big network may be some small network. When the input forward network is a NetOp, it needs to call the sub NetOp/Operators backward function recursively. During the process, we need to collect the `OutputGradients` name according to the forward NetOp. - -3. RnnOp - - RnnOp is a nested stepnet operator. Backward module needs to recusively call `Backward` for every stepnet. - -4. Sharing Variables - - As illustrated in the figure 1 and figure 2, two operators share the same variable name **W@GRAD**, which will overwrite their shared input variable. - -

-
- -​ Figure 1. Sharing variables in operators. - -

- -​ Sharing variable between operators or same input variable used in multiple operators can lead to duplicate gradient variables. As illustrated in figure 2, we need to rename the gradient names recursively and add a generic add operator to prevent overwriting. - -

-
- -​ Figure 2. Replace sharing variable's gradient with `Add` operator. - -

- -​ Because the framework finds variables according to their names, we need to rename the output links. We add an integer suffix to represent its position in the clockwise direction. - -5. Part of the Gradient is Zero. - - In the whole graph, there is some case of that one operator's gradient is not needed, but its input's gradient is a dependency link of other operator, we need to fill a same shape gradient matrix in the position. In our implementation, we insert a special `fillZeroLike` operator. - - -Follow these rules above, then collect the sub graph `OutputGradients`/`InputGradients` as the NetOp's and return it. diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 0957646b5642cd9afce5d88b2c638679cb01f198..692406b1c37d0c02714eafb5cf9a28329ed873bc 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/backward.h" diff --git a/paddle/framework/data_layout.h b/paddle/framework/data_layout.h index 7429de7ee39297c26360984809e2451100f7b3ff..4a8669c3a41fceaad26878a79eabfd0affce86fd 100644 --- a/paddle/framework/data_layout.h +++ b/paddle/framework/data_layout.h @@ -13,11 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/platform/enforce.h" + +#include +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { -enum DataLayout { +enum class DataLayout { kNHWC = 0, kNCHW = 1, kAnyLayout = 2, @@ -33,5 +37,23 @@ inline DataLayout StringToDataLayout(const std::string& str) { } } +inline std::string DataLayoutToString(const DataLayout& data_layout) { + switch (data_layout) { + case DataLayout::kNHWC: + return "NHWC"; + case DataLayout::kNCHW: + return "NCHW"; + case DataLayout::kAnyLayout: + return "ANY_LAYOUT"; + default: + PADDLE_THROW("unknown DataLayou %d", data_layout); + } +} + +inline std::ostream& operator<<(std::ostream& out, DataLayout l) { + out << DataLayoutToString(l); + return out; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/data_transform.cc b/paddle/framework/data_transform.cc new file mode 100644 index 0000000000000000000000000000000000000000..6b1780968867067fe6d1e7fc576811f0a07340b3 --- /dev/null +++ b/paddle/framework/data_transform.cc @@ -0,0 +1,159 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include + +#include "paddle/framework/data_transform.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace framework { + +DataTransformFnMap& DataTransformFnMap::Instance() { + static DataTransformFnMap data_transform_map; + return data_transform_map; +} + +auto KernelFP32 = OpKernelType(proto::DataType::FP32, platform::CPUPlace(), + DataLayout::kNHWC, LibraryType::kPlain); + +auto KernelFP64 = OpKernelType(proto::DataType::FP64, platform::CPUPlace(), + DataLayout::kNHWC, LibraryType::kPlain); + +auto KernelNHWC = OpKernelType(proto::DataType::FP64, platform::CPUPlace(), + DataLayout::kNHWC, LibraryType::kPlain); + +auto KernelNCHW = OpKernelType(proto::DataType::FP64, platform::CPUPlace(), + DataLayout::kNCHW, LibraryType::kPlain); + +// TODO(dzhwinter): Only for testing multiple op kernel. +// Dummy transform function for library_type +// should be removed. +auto KernelPlain = OpKernelType(proto::DataType::FP32, platform::CUDAPlace(0), + DataLayout::kAnyLayout, LibraryType::kPlain); + +auto KernelCUDNN = OpKernelType(proto::DataType::FP32, platform::CUDAPlace(0), + DataLayout::kAnyLayout, LibraryType::kCUDNN); + +void DummyTrans(const platform::DeviceContext* ctx, + const KernelTypePair& kernel_pair, const Variable& in, + Variable* out) { + PADDLE_ENFORCE(in.IsType(), "Only Support Tensor transform!."); + PADDLE_ENFORCE( + platform::places_are_same_class(kernel_pair.first.place_, + kernel_pair.second.place_), + "TransDataType Only Support DataType transform on same place!"); + auto src = in.Get(); + auto* dst = out->GetMutable(); + *dst = src; +} + +void TransDataType(const platform::DeviceContext* ctx, + const KernelTypePair& kernel_pair, const Variable& in, + Variable* out) { + PADDLE_ENFORCE(in.IsType(), "Only Support Tensor transform!."); + PADDLE_ENFORCE( + platform::places_are_same_class(kernel_pair.first.place_, + kernel_pair.second.place_), + "TransDataType Only Support DataType transform on same place!"); + + auto src = in.Get(); + auto* dst = out->GetMutable(); + + auto dims = src.dims(); + dst->Resize(dims); + auto dst_type = kernel_pair.second.data_type_; + auto src_type = kernel_pair.first.data_type_; + + switch (src_type) { + case proto::DataType::FP32: + framework::VisitDataType(dst_type, CastDataType(src, dst, ctx)); + break; + case proto::DataType::FP64: + framework::VisitDataType(dst_type, CastDataType(src, dst, ctx)); + break; + case proto::DataType::INT32: + framework::VisitDataType(dst_type, CastDataType(src, dst, ctx)); + break; + case proto::DataType::INT64: + framework::VisitDataType(dst_type, CastDataType(src, dst, ctx)); + break; + case proto::DataType::BOOL: + framework::VisitDataType(dst_type, CastDataType(src, dst, ctx)); + break; + default: + PADDLE_THROW("Not support type %d", src_type); + } +} + +void TransDataLayout(const std::vector& axis, + const platform::DeviceContext* ctx, + const KernelTypePair& kernel_pair, const Variable& in, + Variable* out) { + PADDLE_ENFORCE(in.IsType(), "Only support Tensor transform!."); + PADDLE_ENFORCE( + platform::places_are_same_class(kernel_pair.first.place_, + kernel_pair.second.place_), + "TransDataLayout only support DataLayout transform on same place!"); + PADDLE_ENFORCE(kernel_pair.first.data_type_ == kernel_pair.second.data_type_, + "TransDataLayout only support Datatype are same!"); + + auto src = in.Get(); + auto* dst = out->GetMutable(); + PADDLE_ENFORCE(arity(src.dims()) == 4, "Input Arity Only Suppport 4!"); + + auto place = kernel_pair.second.place_; + CopyFrom(src, place, *ctx, dst); + + auto src_dim = src.dims(); + std::vector dst_dim; + + dst_dim.resize(axis.size()); + for (size_t i = 0; i < axis.size(); i++) { + dst_dim[i] = src_dim[axis[i]]; + } + + dst->Resize(make_ddim(dst_dim)); + + auto src_type = kernel_pair.first.data_type_; + framework::VisitDataType(src_type, CastDataLayout(ctx, axis, src, dst)); + + dst->set_layout(kernel_pair.second.data_layout_); +} + +} // namespace framework +} // namespace paddle + +namespace f = paddle::framework; + +namespace { +std::vector NHWC2NCHW = {0, 3, 1, 2}; +std::vector NCHW2NHWC = {0, 2, 3, 1}; +} + +REGISTER_DATA_TRANSFORM_FN(f::KernelFP32, f::KernelFP64, f::TransDataType); +REGISTER_DATA_TRANSFORM_FN(f::KernelPlain, f::KernelCUDNN, f::DummyTrans); +REGISTER_DATA_TRANSFORM_FN(f::KernelCUDNN, f::KernelPlain, f::DummyTrans); +REGISTER_DATA_TRANSFORM_FN(f::KernelNHWC, f::KernelNCHW, + std::bind(f::TransDataLayout, NHWC2NCHW, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4)); +REGISTER_DATA_TRANSFORM_FN(f::KernelNCHW, f::KernelNHWC, + std::bind(f::TransDataLayout, NCHW2NHWC, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4)); diff --git a/paddle/framework/data_transform.h b/paddle/framework/data_transform.h new file mode 100644 index 0000000000000000000000000000000000000000..56ebc80f4386958608213f30e745f2d9528e9e5e --- /dev/null +++ b/paddle/framework/data_transform.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/framework/op_kernel_type.h" +#include "paddle/framework/tensor.h" +#include "paddle/framework/variable.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/macros.h" +#include "paddle/platform/transform.h" + +namespace paddle { +namespace framework { + +using KernelTypePair = std::pair; + +using DataTransformFn = + std::function; + +struct KernelTypePairHash { + static void HashCombine(const OpKernelType& t, std::size_t* seed) { + OpKernelType::Hash kernel_type_hasher; + (*seed) ^= kernel_type_hasher(t) + 0x9e3779b9 + (*seed << 6) + (*seed >> 2); + } + + size_t operator()(const KernelTypePair& kernel_pair) const { + std::size_t seed = 0; + HashCombine(kernel_pair.first, &seed); + HashCombine(kernel_pair.second, &seed); + return seed; + } +}; + +template +struct CastDataTypeFunctor { + HOSTDEVICE inline OutType operator()(InType in) const { + return static_cast(in); + } +}; + +template +struct CastDataType { + CastDataType(const framework::Tensor& in, framework::Tensor* out, + const platform::DeviceContext* ctx) + : in_(in), out_(out), ctx_(ctx) {} + const framework::Tensor in_; + framework::Tensor* out_; + const platform::DeviceContext* ctx_; + + template + void operator()() { + auto place = ctx_->GetPlace(); + + auto* in_begin = in_.data(); + auto numel = in_.numel(); + auto* in_end = in_begin + numel; + auto* out_begin = out_->mutable_data(place); + + if (platform::is_cpu_place(place)) { + platform::Transform trans; + auto* context = static_cast(ctx_); + trans(*context, in_begin, in_end, out_begin, + CastDataTypeFunctor()); + } else { + // TODO(dzhwinter): enhance CopyFrom CPU<->GPU with different data type? + PADDLE_THROW("Unsupport CPU <-> GPU!"); + } + } +}; + +struct CastDataLayout { + CastDataLayout(const platform::DeviceContext* ctx, + const std::vector& axis, const framework::Tensor& in, + framework::Tensor* out) + : in_(in), out_(out), ctx_(ctx), axis_(axis) {} + const framework::Tensor in_; + framework::Tensor* out_; + const platform::DeviceContext* ctx_; + const std::vector axis_; + + template + void operator()() { + auto place = ctx_->GetPlace(); + + if (platform::is_cpu_place(place)) { + operators::math::Transpose trans4; + auto* context = static_cast(ctx_); + trans4(*context, in_, out_, axis_); + } else { + PADDLE_THROW("Unsupport CPU <-> GPU!"); + } + } +}; + +using DataTransformMap = + std::unordered_map; + +class DataTransformFnMap { + public: + static DataTransformFnMap& Instance(); + + bool Has(const KernelTypePair& key_pair) const { + return map_.find(key_pair) != map_.end(); + } + + void Insert(const OpKernelType& left, const OpKernelType& right, + const DataTransformFn& data_tranform_fn) { + Insert(std::make_pair(left, right), data_tranform_fn); + } + + void Insert(const KernelTypePair& kernel_type_pair, + const DataTransformFn& data_tranform_fn) { + PADDLE_ENFORCE(!Has(kernel_type_pair), + "KernelTypePair %s has been registered", ""); + map_.insert({kernel_type_pair, data_tranform_fn}); + } + + const DataTransformFn& Get(const KernelTypePair& key_pair) const { + auto data_transformer = GetNullable(key_pair); + PADDLE_ENFORCE_NOT_NULL(data_transformer, + "DataTransformFn should not be NULL"); + return *data_transformer; + } + + const DataTransformFn* GetNullable(const KernelTypePair& key_pair) const { + auto it = map_.find(key_pair); + if (it == map_.end()) { + return nullptr; + } else { + return &(it->second); + } + } + + const DataTransformMap& Map() const { return map_; } + + private: + DataTransformFnMap() = default; + DataTransformMap map_; + DISABLE_COPY_AND_ASSIGN(DataTransformFnMap); +}; + +// generate unique name with __LINE__ +// refs https://stackoverflow.com/questions/1597007 +#define TOKENPASTE(x, y) x##y +#define TOKENPASTE2(x, y) TOKENPASTE(x, y) +#define REGISTER_DATA_TRANSFORM_FN(from, to, fn) \ + static int TOKENPASTE2(fn_, __LINE__)() { \ + ::paddle::framework::DataTransformFnMap::Instance().Insert(from, to, fn); \ + return 0; \ + } \ + static int TOKENPASTE2(var_, __LINE__) __attribute__((unused)) = \ + TOKENPASTE2(fn_, __LINE__)() + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/data_transform_test.cc b/paddle/framework/data_transform_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..edd305fd17ae202926b83fbec10089719baa2e16 --- /dev/null +++ b/paddle/framework/data_transform_test.cc @@ -0,0 +1,168 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include +#include + +#include + +#include "paddle/framework/data_transform.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace framework { +using namespace platform; + +/** + * @brief cross validation of different kernel type transform + * We use four bit map represent different combination. + * If the field has multiple possible value, only choose two of them. + * For DataType, only test the FP32(float), FP64(double). + * e.g. 0000 -> FP32, CPUPlace, kNHWC, kPlain + * 1111 -> FP64, GPUPlace, kNCHW, kMKLDNN + */ + +std::array kDataType = { + {proto::DataType::FP32, proto::DataType::FP64}}; + +std::array kPlace = {{CPUPlace(), CUDAPlace(0)}}; + +std::array kDataLayout = {{ + DataLayout::kNHWC, DataLayout::kNCHW, +}}; + +std::array kLibraryType = {{ + LibraryType::kPlain, LibraryType::kMKLDNN, +}}; + +OpKernelType GenFromBit(const std::vector bits) { + return OpKernelType(kDataType[bits[0]], kPlace[bits[1]], kDataLayout[bits[2]], + kLibraryType[bits[3]]); +} + +int test_value = 0; + +auto kernel0 = GenFromBit({0, 0, 0, 0}); +auto kernel1 = GenFromBit({0, 0, 0, 1}); +auto kernel2 = GenFromBit({0, 0, 1, 0}); +auto kernel3 = GenFromBit({0, 0, 1, 1}); + +void TransDataType_t(const platform::DeviceContext* ctx, + const KernelTypePair& p, const Variable& in, + Variable* out) { + test_value++; +} + +void TransDataLayout_t(const platform::DeviceContext* ctx, + const KernelTypePair& p, const Variable& in, + Variable* out) { + test_value--; +} + +void TransLibraryType_t(const platform::DeviceContext* ctx, + const KernelTypePair& p, const Variable& in, + Variable* out) { + test_value += 2; +} + +} // namespace framework +} // namespace paddle + +namespace frw = paddle::framework; + +REGISTER_DATA_TRANSFORM_FN(frw::kernel0, frw::kernel1, frw::TransDataType_t); +REGISTER_DATA_TRANSFORM_FN(frw::kernel1, frw::kernel2, frw::TransDataLayout_t); +REGISTER_DATA_TRANSFORM_FN(frw::kernel0, frw::kernel2, frw::TransLibraryType_t); + +TEST(DataTransform, Register) { + using namespace paddle::framework; + using namespace paddle::platform; + + auto& instance = DataTransformFnMap::Instance(); + paddle::framework::Variable in; + paddle::framework::Variable out; + + DeviceContext* ctx = new CPUDeviceContext(); + auto pair0 = std::make_pair(frw::kernel0, frw::kernel1); + instance.Get(pair0)(ctx, pair0, in, &out); + ASSERT_EQ(test_value, 1); + + auto pair1 = std::make_pair(frw::kernel1, frw::kernel2); + instance.Get(pair1)(ctx, pair1, in, &out); + ASSERT_EQ(test_value, 0); + + auto pair3 = std::make_pair(frw::kernel0, frw::kernel2); + instance.Get(pair3)(ctx, pair3, in, &out); + ASSERT_EQ(test_value, 2); +} + +TEST(DataTransform, DataLayout) { + using namespace paddle::framework; + using namespace paddle::platform; + + auto& instance = DataTransformFnMap::Instance(); + Variable in; + Variable out; + Tensor* src = in.GetMutable(); + src->mutable_data(make_ddim({2, 3, 1, 2}), CPUPlace()); + src->set_layout(DataLayout::kNHWC); + + DeviceContext* ctx = new CPUDeviceContext(); + + { + auto kernel1 = GenFromBit({1, 0, 0, 0}); + auto kernel2 = GenFromBit({1, 0, 1, 0}); + auto pair0 = std::make_pair(kernel1, kernel2); + instance.Get(pair0)(ctx, pair0, in, &out); + } + + Tensor dst = out.Get(); + + EXPECT_TRUE(dst.layout() == DataLayout::kNCHW); + EXPECT_TRUE(dst.dims() == make_ddim({2, 2, 3, 1})); + + { + auto kernel1 = GenFromBit({1, 0, 1, 0}); + auto kernel2 = GenFromBit({1, 0, 0, 0}); + auto pair0 = std::make_pair(kernel1, kernel2); + instance.Get(pair0)(ctx, pair0, out, &in); + } + + EXPECT_TRUE(src->layout() == DataLayout::kNHWC); + EXPECT_TRUE(src->dims() == make_ddim({2, 3, 1, 2})); +} + +TEST(DataTransform, DataType) { + using namespace paddle::framework; + using namespace paddle::platform; + + auto& instance = DataTransformFnMap::Instance(); + DeviceContext* ctx = new CPUDeviceContext(); + + Variable in; + Variable out; + Tensor* src = in.GetMutable(); + float* ptr = src->mutable_data(make_ddim({2, 3}), CPUPlace()); + for (int i = 0; i < 6; ++i) { + ptr[i] = i / 3; + } + + { + auto kernel1 = GenFromBit({0, 0, 0, 0}); + auto kernel2 = GenFromBit({1, 0, 0, 0}); + auto pair0 = std::make_pair(kernel1, kernel2); + instance.Get(pair0)(ctx, pair0, in, &out); + } + Tensor dst = out.Get(); + EXPECT_TRUE(dst.data() != nullptr); +} diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h index e94ee2ed52bc40f52caf783f971dd0b560534e08..6a372ac32e48131eed28e2d42125feb5b92a11c7 100644 --- a/paddle/framework/data_type.h +++ b/paddle/framework/data_type.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index bd5ea09d7da700479aa387283d7bde77c64c1293..bc259d1f603fb34ac8546c388669d8c5c1250bd1 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include diff --git a/paddle/framework/details/op_registry.h b/paddle/framework/details/op_registry.h index 7f5151c41d6046f21f7a9707e45de85ec50219ad..6d50e820b2b625f932768d2ca671d999071f1ca6 100644 --- a/paddle/framework/details/op_registry.h +++ b/paddle/framework/details/op_registry.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 14ae37ec49c12203381e74b3f9174a460e41c18e..bf1f0471ccbfccf13cb6f74c8088da7acd68ec0b 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -14,18 +14,17 @@ limitations under the License. */ #include "paddle/framework/executor.h" -#include -#include -#include #include -#include +#include "gflags/gflags.h" #include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/lod_rank_table.h" -#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" -#include "paddle/framework/scope.h" + +DEFINE_bool(check_nan_inf, false, + "Checking whether operator produce NAN/INF or not. It will be " + "extremely slow so please use this flag wisely."); namespace paddle { namespace framework { @@ -33,13 +32,7 @@ namespace framework { const std::string kFeedOpType = "feed"; const std::string kFetchOpType = "fetch"; -DeviceContextPool* DeviceContextPool::pool = nullptr; - -Executor::Executor(const std::vector& places) { - DeviceContextPool& pool = DeviceContextPool::Get(); - auto borrowed_contexts = pool.Borrow(places); - device_contexts_.swap(borrowed_contexts); -} +Executor::Executor(const platform::Place& place) : place_(place) {} static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { if (var_type == proto::VarDesc::LOD_TENSOR) { @@ -64,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { } } +static void CheckTensorNANOrInf(const std::string& name, + const framework::Tensor& tensor) { + if (tensor.memory_size() == 0) { + return; + } + if (tensor.type().hash_code() != typeid(float).hash_code() && + tensor.type().hash_code() != typeid(double).hash_code()) { + return; + } + PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); + PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name); +} + void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool create_local_scope, bool create_vars) { // TODO(tonyyang-svail): @@ -71,7 +77,6 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, // - will change to use multiple blocks for RNN op and Cond Op PADDLE_ENFORCE_LT(static_cast(block_id), pdesc.Size()); auto& block = pdesc.Block(block_id); - auto& device = device_contexts_[0]; Scope* local_scope = scope; if (create_vars) { @@ -107,9 +112,18 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(3) << op->DebugString(); - op->Run(*local_scope, *device); + op->Run(*local_scope, place_); + if (FLAGS_check_nan_inf) { + for (auto& vname : op->OutputVars(true)) { + auto* var = local_scope->FindVar(vname); + if (var == nullptr) continue; + if (var->IsType()) { + CheckTensorNANOrInf(vname, var->Get()); + } + } + } } - if (create_local_scope) { + if (create_vars && create_local_scope) { scope->DeleteScope(local_scope); } } diff --git a/paddle/framework/executor.h b/paddle/framework/executor.h index a3d1609293a0d687c33447ca7a0df95c6aac3bc5..d869e18901b82959a40cc296aa0844c20ea63ac1 100644 --- a/paddle/framework/executor.h +++ b/paddle/framework/executor.h @@ -14,9 +14,6 @@ limitations under the License. */ #pragma once -#include -#include - #include "paddle/framework/op_info.h" #include "paddle/framework/program_desc.h" #include "paddle/framework/scope.h" @@ -26,96 +23,13 @@ limitations under the License. */ namespace paddle { namespace framework { -class DeviceContextPool { - public: - static DeviceContextPool& Get() { - PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!"); - return *pool; - } - - static DeviceContextPool& Create(const std::vector& places) { - if (pool == nullptr) { - pool = new DeviceContextPool(places); - } - return *pool; - } - - const platform::DeviceContext* Borrow(const platform::Place& place) { - auto range = device_contexts_.equal_range(place); - if (range.first == range.second) { - PADDLE_THROW( - "'Place' is not supported, Please re-compile with WITH_GPU " - "option"); - } - return range.first->second; - } - - std::vector Borrow( - const std::vector& places) { - PADDLE_ENFORCE_GT(places.size(), 0); - PADDLE_ENFORCE_LE(places.size(), device_contexts_.size()); - std::vector borrowed_contexts; - for (auto& place : places) { - auto range = device_contexts_.equal_range(place); - if (range.first == range.second) { - PADDLE_THROW( - "'Place' is not supported, Please re-compile with WITH_GPU " - "option"); - } - // TODO(dzhwinter) : assign the first found device. Will enhanced later. - // device load balancer maybe useful here. - borrowed_contexts.emplace_back(range.first->second); - } - return borrowed_contexts; - } - - explicit DeviceContextPool(const std::vector& places) { - PADDLE_ENFORCE_GT(places.size(), 0); - for (size_t i = 0; i < places.size(); i++) { - if (platform::is_cpu_place(places[i])) { - device_contexts_.emplace( - places[i], new platform::CPUDeviceContext( - boost::get(places[i]))); - } else if (platform::is_gpu_place(places[i])) { -#ifdef PADDLE_WITH_CUDA - device_contexts_.emplace( - places[i], new platform::CUDADeviceContext( - boost::get(places[i]))); -#else - PADDLE_THROW( - "'GPUPlace' is not supported, Please re-compile with WITH_GPU " - "option"); -#endif - } - } - } - - ~DeviceContextPool() {} - - private: - static DeviceContextPool* pool; - struct Hash { - std::hash hash_; - size_t operator()(const platform::Place& place) const { - return hash_(place.which()); - } - }; - std::unordered_multimap - device_contexts_; - DISABLE_COPY_AND_ASSIGN(DeviceContextPool); -}; - class Executor { public: // TODO(dzhwinter) : Do not rely on this function, it will be removed explicit Executor(const platform::DeviceContext& device) - : Executor(std::vector({device.GetPlace()})) {} - - explicit Executor(const platform::Place& place) - : Executor(std::vector({place})) {} + : Executor(device.GetPlace()) {} - explicit Executor(const std::vector& places); + explicit Executor(const platform::Place& place); /* @Brief * Runtime evaluation of the given ProgramDesc under certain Scope @@ -128,7 +42,7 @@ class Executor { bool create_vars = true); private: - std::vector device_contexts_; + const platform::Place place_; }; } // namespace framework diff --git a/paddle/framework/feed_fetch_type.h b/paddle/framework/feed_fetch_type.h index bc4ae440fc708f696c18bb9d5ab3ba7dd59e21ab..9bc4a90c44828ecb7458d524f59609f01848cc5c 100644 --- a/paddle/framework/feed_fetch_type.h +++ b/paddle/framework/feed_fetch_type.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h index cf411fa710103350713342b43946697a8dd2aa46..2de5242831835b47893a5825e5532500ad5ec3f9 100644 --- a/paddle/framework/grad_op_desc_maker.h +++ b/paddle/framework/grad_op_desc_maker.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc index 1c4476f4b30aebf094eb27b45fb435c24a9061c1..7ec8d18b0e886948f4fb951e17875584413771db 100644 --- a/paddle/framework/init.cc +++ b/paddle/framework/init.cc @@ -1,21 +1,22 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include -#include "paddle/framework/executor.h" #include "paddle/framework/init.h" +#include "paddle/framework/operator.h" +#include "paddle/platform/device_context.h" #include "paddle/platform/place.h" #include "paddle/string/piece.h" @@ -24,7 +25,6 @@ namespace framework { std::once_flag gflags_init_flag; -// TODO(qijun) move init gflags to init.cc void InitGflags(std::vector &argv) { std::call_once(gflags_init_flag, [&]() { int argc = argv.size(); @@ -48,13 +48,13 @@ bool InitDevices(const std::vector &devices) { std::vector places; for (auto &device : devices) { auto p = string::Piece(device); - if (string::Find(p, ':', 0) == string::Piece::npos) { + if (string::HasPrefix(p, "CPU")) { places.emplace_back(platform::CPUPlace()); } else if (string::HasPrefix(p, "GPU")) { #ifdef PADDLE_WITH_CUDA auto pos = string::RFind(p, ':', string::Piece::npos); auto number = device.substr(pos + 1); - places.emplace_back(platform::GPUPlace(std::stoi(number))); + places.emplace_back(platform::CUDAPlace(std::stoi(number))); #else LOG(WARNING) << "'GPU' is not supported, Please re-compile with WITH_GPU option"; @@ -69,12 +69,17 @@ bool InitDevices(const std::vector &devices) { return platform::is_cpu_place(place); }) == places.end()) { places.emplace_back(platform::CPUPlace()); - LOG(WARNING) << "Not specified any device, use CPU by Default."; + LOG(WARNING) << "Not specified CPU device, create CPU by Default."; } - DeviceContextPool::Create(places); - return true; + platform::DeviceContextPool::Init(places); + framework::UseALL(); return true; } +void InitGLOG(const std::string &prog_name) { + google::InitGoogleLogging(prog_name.c_str()); + google::InstallFailureSignalHandler(); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/init.h b/paddle/framework/init.h index 1715cd81e6647158e269e39d4d91fbe065cd0008..9c84a03ded52632047841f95badbcf44bc9f48d1 100644 --- a/paddle/framework/init.h +++ b/paddle/framework/init.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include @@ -22,6 +22,8 @@ namespace framework { void InitGflags(std::vector &argv); +void InitGLOG(const std::string &prog_name); + bool InitDevices(const std::vector &devices); } // namespace framework diff --git a/paddle/framework/init_test.cc b/paddle/framework/init_test.cc index f65e881a761e0a546d595eced26dd5b12475a763..f0788051d4855a175d2d7ea1f1a0805c776c462b 100644 --- a/paddle/framework/init_test.cc +++ b/paddle/framework/init_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "gtest/gtest.h" #include "paddle/framework/init.h" @@ -23,5 +23,9 @@ TEST(Init, InitDevices) { #ifdef PADDLE_WITH_CUDA std::vector ds2 = {"CPU", "GPU:0", "GPU:1"}; ASSERT_EQ(InitDevices(ds2), true); + + // test re-init + std::vector ds3 = {"GPU:0", "GPU:1"}; + ASSERT_EQ(InitDevices(ds3), true); #endif } diff --git a/paddle/framework/library_type.h b/paddle/framework/library_type.h index 68e9cabb667a5b2421fad8333d8e1be7bfa57002..1e3084835439b0d55de72a669b93acbaef7ed6b9 100644 --- a/paddle/framework/library_type.h +++ b/paddle/framework/library_type.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include namespace paddle { namespace framework { @@ -20,7 +21,51 @@ namespace framework { // For more details about the design of LibraryType, Please refer to // https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library -enum LibraryType { kPlain = 0; kMKLDNN = 1; kCUDNN = 2; } +enum class LibraryType { + kPlain = 0, + kMKLDNN = 1, + kCUDNN = 2, +}; + +inline std::string LibraryTypeToString(const LibraryType& library_type) { + switch (library_type) { + case LibraryType::kPlain: + return "PLAIN"; + case LibraryType::kMKLDNN: + return "MKLDNN"; + case LibraryType::kCUDNN: + return "CUDNN"; + default: + PADDLE_THROW("unknown LibraryType %d", static_cast(library_type)); + } +} + +inline LibraryType StringToLibraryType(const char* ctype) { + std::string s(ctype); + for (size_t i = 0; i < s.size(); ++i) { + s[i] = toupper(s[i]); + } + if (s == std::string("PLAIN")) { + return LibraryType::kPlain; + } else if (s == std::string("MKLDNN")) { + return LibraryType::kMKLDNN; + } else if (s == std::string("CUDNN")) { + return LibraryType::kCUDNN; + // To be compatible with register macro. + // CPU, CUDA, PLAIN are same library type. + } else if (s == std::string("CPU")) { + return LibraryType::kPlain; + } else if (s == std::string("CUDA")) { + return LibraryType::kPlain; + } else { + PADDLE_THROW("Unknown LibraryType %s", s.c_str()); + } +} + +inline std::ostream& operator<<(std::ostream& out, LibraryType l) { + out << LibraryTypeToString(l); + return out; +} } // namespace } // framework diff --git a/paddle/framework/lod_rank_table.cc b/paddle/framework/lod_rank_table.cc index 17d524c09276fc0eb166925bd79bc0bdfcead195..704bce2a0eb60b974efd41a4edda0af2933da825 100644 --- a/paddle/framework/lod_rank_table.cc +++ b/paddle/framework/lod_rank_table.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/lod_rank_table.h" diff --git a/paddle/framework/lod_rank_table.h b/paddle/framework/lod_rank_table.h index d3007d3d7379a59b32465cbd55780c6268e0e4a8..df188709e91871ded0258fa5703ee16a5664f057 100644 --- a/paddle/framework/lod_rank_table.h +++ b/paddle/framework/lod_rank_table.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 465f8c62b5fe2efd549f68bb3a9823d299ba5393..ef85ed69dbe87d4b5b1b1b5d6a04220a5266a635 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/lod_tensor.h" #include "paddle/framework/data_type.h" @@ -43,6 +43,22 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) { return os; } +std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { + PADDLE_ENFORCE(platform::is_cpu_place(t.place())); + PADDLE_ENFORCE(t.type().hash_code() == typeid(float).hash_code()); + + os << "dim: " << t.dims() << "\n"; + os << "lod: " << t.lod() << "\n"; + + // only print first ten elements + int64_t size = t.numel() < 10 ? t.numel() : 10; + for (int64_t i = 0; i < size; ++i) { + os << t.data()[i] << " "; + } + + return os; +} + LoD SliceLevels(const LoD &in, size_t level_begin, size_t level_end) { LoD new_lod; new_lod.reserve(level_end - level_begin); @@ -177,6 +193,9 @@ void AppendLoD(LoD *lod, const LoD &lod_length) { lod->empty() || lod->size() == lod_length.size(), "The lod_length should has the same size with the appended lod."); if (lod->empty()) { + for (size_t i = 0; i < lod_length.size(); ++i) { + lod->emplace_back(1, 0); // size = 1, value = 0; + } *lod = LoD(lod_length.size(), std::vector({0})); } for (size_t i = 0; i < lod->size(); ++i) { @@ -189,62 +208,16 @@ void AppendLoD(LoD *lod, const LoD &lod_length) { void SerializeToStream(std::ostream &os, const LoDTensor &tensor, const platform::DeviceContext &dev_ctx) { - // TODO(typhoonzero): serialize to ostream - { // the 1st field, uint32_t version + { // the 1st field, uint32_t version for LoDTensor constexpr uint32_t version = 0; os.write(reinterpret_cast(&version), sizeof(version)); } - { // the 2nd field, tensor description - // int32_t size - // void* protobuf message - proto::TensorDesc desc; - desc.set_data_type(framework::ToDataType(tensor.type())); - auto dims = framework::vectorize(tensor.dims()); - auto *pb_dims = desc.mutable_dims(); - pb_dims->Resize(static_cast(dims.size()), 0); - std::copy(dims.begin(), dims.end(), pb_dims->begin()); - int32_t size = desc.ByteSize(); - os.write(reinterpret_cast(&size), sizeof(size)); - auto out = desc.SerializeAsString(); - os.write(out.data(), size); - } - { // the 3rd field, tensor data - uint64_t size = tensor.memory_size(); - auto *data_ptr = tensor.data(); - PADDLE_ENFORCE(size < std::numeric_limits::max(), - "Index overflow when writing tensor"); - if (platform::is_gpu_place(tensor.place())) { -#ifdef PADDLE_WITH_CUDA - constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB - std::unique_ptr buf(new char[kBufSize]); - auto &gpu_dev_ctx = - static_cast(dev_ctx); - platform::CPUPlace cpu; - uintptr_t data = reinterpret_cast(data_ptr); - while (size != 0) { - size_t size_to_write = std::min(kBufSize, static_cast(size)); - memory::Copy(cpu, buf.get(), - boost::get(tensor.place()), - reinterpret_cast(data), size_to_write, - gpu_dev_ctx.stream()); - gpu_dev_ctx.Wait(); - os.write(buf.get(), size_to_write); - data += size_to_write; - size -= size_to_write; - } -#else - PADDLE_THROW("Unexpected branch"); -#endif - } else { - os.write(static_cast(data_ptr), - static_cast(size)); - } - } - { // the 4th field, lod information - // uint64_t lod_level - // uint64_t lod_level_1 size in byte. - // int* lod_level_1 data - // ... + { + // the 2st field, LoD information + // uint64_t lod_level + // uint64_t lod_level_1 size in byte. + // int* lod_level_1 data + // ... auto lod = tensor.lod(); uint64_t size = lod.size(); os.write(reinterpret_cast(&size), sizeof(size)); @@ -256,49 +229,20 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor, static_cast(size)); } } + // the 3st field, Tensor + SerializeToStream(os, static_cast(tensor), dev_ctx); } -void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { - uint32_t version; - is.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); - proto::TensorDesc desc; - { // int32_t size - // proto buffer - int32_t size; - is.read(reinterpret_cast(&size), sizeof(size)); - std::unique_ptr buf(new char[size]); - is.read(reinterpret_cast(buf.get()), size); - PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), - "Cannot parse tensor desc"); - } - { // read tensor - std::vector dims; - dims.reserve(static_cast(desc.dims().size())); - std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); - tensor->Resize(framework::make_ddim(dims)); - - void *buf; - platform::Place cpu = platform::CPUPlace(); - switch (desc.data_type()) { - case proto::FP32: - buf = tensor->mutable_data(cpu); - break; - case proto::FP64: - buf = tensor->mutable_data(cpu); - break; - case proto::INT32: - buf = tensor->mutable_data(cpu); - break; - case proto::INT64: - buf = tensor->mutable_data(cpu); - break; - default: - PADDLE_THROW("DataType %d not supported", desc.data_type()); - } - is.read(static_cast(buf), tensor->memory_size()); +void DeserializeFromStream(std::istream &is, LoDTensor *tensor, + const platform::DeviceContext &dev_ctx) { + { + // the 1st field, unit32_t version for LoDTensor + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); } - { // read lod + { + // the 2st field, LoD information uint64_t lod_level; is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); auto &lod = *tensor->mutable_lod(); @@ -312,6 +256,72 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { lod[i] = tmp; } } + // the 3st filed, Tensor + DeserializeFromStream(is, static_cast(tensor), dev_ctx); +} + +std::vector LoDTensor::SplitLoDTensor( + const std::vector places) const { + check_memory_size(); + // PADDLE_ENFORCE(lod().empty() || (lod().size() == 1 && lod()[0].empty()) + // , "Disable parallel lod for now"); + PADDLE_ENFORCE(lod().empty(), "Disable parallel lod for now"); + PADDLE_ENFORCE(dims()[0] % places.size() == 0, + "Batch size should be divided by places size"); + + std::vector lods; + for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) { + size_t begin = place_idx * dims()[0] / places.size(); + size_t end = (place_idx + 1) * dims()[0] / places.size(); + auto src = Slice(static_cast(begin), static_cast(end)); + + LoDTensor dst; + dst.Resize(src.dims()); + auto &dst_place = places[place_idx]; + auto dst_ptr = dst.mutable_data(dst_place, src.type()); + + // TODO(tonyyang-svail): + // change the following to framework::CopyFrom + auto src_place = src.place(); + auto src_ptr = src.data(); + auto size = src.numel() * SizeOfType(src.type()); + if (platform::is_cpu_place(src_place) && + platform::is_cpu_place(dst_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size); + } else { + PADDLE_THROW("Not Implemented"); + } + + lods.emplace_back(dst); + } + + return lods; +} + +void LoDTensor::MergeLoDTensor( + const std::vector &lod_tensors, platform::Place place) { + PADDLE_ENFORCE(platform::is_cpu_place(place)); + PADDLE_ENFORCE(!lod_tensors.empty()); + + framework::DDim new_dim = lod_tensors[0]->dims(); + std::type_index new_type = lod_tensors[0]->type(); + for (auto *lod : lod_tensors) { + PADDLE_ENFORCE(new_dim == lod->dims()); + PADDLE_ENFORCE(new_type == lod->type()); + PADDLE_ENFORCE(platform::is_cpu_place(lod->place())); + } + new_dim[0] *= lod_tensors.size(); + Resize(new_dim); + + auto *dst_ptr = reinterpret_cast(mutable_data(place, new_type)); + for (auto *src : lod_tensors) { + auto size = src->numel() * SizeOfType(src->type()); + memory::Copy(boost::get(place), dst_ptr, + boost::get(src->place()), + src->data(), size); + dst_ptr += size; + } } } // namespace framework diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 0923c52a0ad2fe10cea760df20c99021984ad39d..b27936c1986d13d33786035adeebe14da92b190e 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once @@ -58,6 +58,7 @@ using Vector = thrust::host_vector< using LoD = std::vector>; std::ostream& operator<<(std::ostream& os, const LoD& lod); +std::ostream& operator<<(std::ostream& os, const LoDTensor& t); /* * Slice levels from a LoD. @@ -144,6 +145,12 @@ class LoDTensor : public Tensor { */ void ShrinkInLevel(size_t level, size_t elem_begin, size_t elem_end); + std::vector SplitLoDTensor( + const std::vector places) const; + + void MergeLoDTensor(const std::vector& lod_tensors, + platform::Place place); + private: LoD lod_; }; @@ -208,7 +215,8 @@ void AppendLoD(LoD* lod, const LoD& lod_length); */ void SerializeToStream(std::ostream& os, const LoDTensor& tensor, const platform::DeviceContext& dev_ctx); -void DeserializeFromStream(std::istream& is, LoDTensor* tensor); +void DeserializeFromStream(std::istream& is, LoDTensor* tensor, + const platform::DeviceContext& dev_ctx); } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor_array.h b/paddle/framework/lod_tensor_array.h index 13f0608d24be97d8bba149b74f1a4deb57deeb48..4a8e7f4fa540b1c2f19a6e3ec236a0dd5c0daf0b 100644 --- a/paddle/framework/lod_tensor_array.h +++ b/paddle/framework/lod_tensor_array.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 02d84b68233f2fdfc66e1df2fc7ce20307cadd94..0868c1f6e695b8de6a755f167951f404de0942ca 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -126,6 +126,20 @@ TEST_F(LoDTensorTester, ShrinkInLevel) { EXPECT_NE(t1.data(), lod_tensor_.data()); } +TEST_F(LoDTensorTester, SerializeAndDeserialize) { + LoDTensor dst_tensor; + platform::CPUDeviceContext cpu_ctx((platform::CPUPlace())); + std::ostringstream oss; + SerializeToStream(oss, lod_tensor_, cpu_ctx); + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor, cpu_ctx); + float* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < kLodTensorSize; ++i) { + EXPECT_EQ(dst_ptr[i], i); + } + EXPECT_EQ(dst_tensor.lod(), lod_tensor_.lod()); +} + TEST(LodExpand, test) { LoD lod{{0, 2}}; LoDTensor tensor; diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu index 5b90fbfca7f6bec4f2c862d0ff18dfd7cf39e181..e8508ad2658ae850e4c98aa798b5db6d007e67d0 100644 --- a/paddle/framework/lod_tensor_test.cu +++ b/paddle/framework/lod_tensor_test.cu @@ -27,7 +27,7 @@ __global__ void test(size_t* a, int size) { TEST(LoDTensor, LoDInGPU) { paddle::framework::LoDTensor lod_tensor; - paddle::platform::GPUPlace place(0); + paddle::platform::CUDAPlace place(0); paddle::framework::LoD src_lod; src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14}); diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index b361e64438251c1df827667fb825e7f5909fb09e..e02e572af2c4ee122d033bc7b5231be94026c180 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -64,7 +64,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { PADDLE_ENFORCE_EQ(in_var->GetType(), proto::VarDesc::LOD_TENSOR, "The %d-th output of Output(%s) must be LoDTensor.", j, out); - out_var->SetLoDLevel(in_var->GetLodLevel()); + out_var->SetLoDLevel(in_var->GetLoDLevel()); } bool IsRuntime() const override; @@ -88,6 +88,14 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs, need_update_ = true; } +void OpDesc::CopyFrom(const OpDesc &op_desc) { + desc_.set_type(op_desc.Type()); + inputs_ = op_desc.inputs_; + outputs_ = op_desc.outputs_; + attrs_ = op_desc.attrs_; + need_update_ = true; +} + OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog) : desc_(desc), need_update_(false) { // restore inputs_ @@ -252,7 +260,13 @@ struct SetAttrDescVisitor : public boost::static_visitor { void operator()(int v) const { attr_->set_i(v); } void operator()(float v) const { attr_->set_f(v); } void operator()(const std::string &v) const { attr_->set_s(v); } - void operator()(bool b) const { attr_->set_b(b); } + + // Please refer to https://github.com/PaddlePaddle/Paddle/issues/7162 + template ::value>::type> + void operator()(T b) const { + attr_->set_b(b); + } void operator()(const std::vector &v) const { VectorToRepeated(v, attr_->mutable_ints()); @@ -266,9 +280,7 @@ struct SetAttrDescVisitor : public boost::static_visitor { void operator()(const std::vector &v) const { VectorToRepeated(v, attr_->mutable_bools()); } - void operator()(proto::BlockDesc *desc) const { - attr_->set_block_idx(desc->idx()); - } + void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } }; diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 93d4a88f3c390551ab41e42ec2f6f30f52e306db..4cf784a0d0d319d09caa27b4e2b589bd7ac4f324 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -35,6 +35,8 @@ class OpDesc { OpDesc(const proto::OpDesc &desc, ProgramDesc *prog); + void CopyFrom(const OpDesc &op_desc); + proto::OpDesc *Proto(); std::string Type() const { return desc_.type(); } diff --git a/paddle/framework/op_info.cc b/paddle/framework/op_info.cc index 81ba29797c5f478e5d6a91236f3e8de1e6b43e49..b520108109bb2f72b80f83559fa065a5ca58e9e1 100644 --- a/paddle/framework/op_info.cc +++ b/paddle/framework/op_info.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_info.h" diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h index 7772d6e745c2207024863d3dd5cbef052358272e..d9b89f9cac9611fcecb18bef87940632df1e2234 100644 --- a/paddle/framework/op_info.h +++ b/paddle/framework/op_info.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/framework/op_kernel_type.h b/paddle/framework/op_kernel_type.h new file mode 100644 index 0000000000000000000000000000000000000000..053897784c1c4350deadf39e2a009220d38f65f9 --- /dev/null +++ b/paddle/framework/op_kernel_type.h @@ -0,0 +1,89 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/data_layout.h" +#include "paddle/framework/data_type.h" +#include "paddle/framework/library_type.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace framework { + +struct OpKernelType { + struct Hash { + size_t operator()(const OpKernelType& key) const { + int place = key.place_.which(); + int data_type = static_cast(key.data_type_) << LEFT_SHIFT; + int data_layout = static_cast(key.data_layout_) << (LEFT_SHIFT * 2); + int library_type = static_cast(key.library_type_) + << (LEFT_SHIFT * 3); + + std::hash hasher; + return hasher(place + data_type + data_layout + library_type); + } + }; + + // place, data_type, library_type kinds less than 2^8 + constexpr static int LEFT_SHIFT = 8; + + proto::DataType data_type_; + DataLayout data_layout_; + platform::Place place_; + LibraryType library_type_; + + OpKernelType(proto::DataType data_type, platform::Place place, + DataLayout data_layout = DataLayout::kAnyLayout, + LibraryType library_type = LibraryType::kPlain) + : data_type_(data_type), + data_layout_(data_layout), + place_(place), + library_type_(library_type) {} + + OpKernelType(proto::DataType data_type, + const platform::DeviceContext& dev_ctx, + DataLayout data_layout = DataLayout::kAnyLayout, + LibraryType library_type = LibraryType::kPlain) + : data_type_(data_type), + data_layout_(data_layout), + place_(dev_ctx.GetPlace()), + library_type_(library_type) {} + + bool operator==(const OpKernelType& o) const { + return platform::places_are_same_class(place_, o.place_) && + data_type_ == o.data_type_ && data_layout_ == o.data_layout_ && + library_type_ == o.library_type_; + } + + bool operator!=(const OpKernelType& o) const { return !(*this == o); } +}; + +inline std::ostream& operator<<(std::ostream& os, + const OpKernelType& kernel_key) { + os << "data_type[" << kernel_key.data_type_ << "]:data_layout[" + << kernel_key.data_layout_ << "]:place[" << kernel_key.place_ + << "]:library_type[" << kernel_key.library_type_ << "]"; + return os; +} + +inline std::string KernelTypeToString(const OpKernelType& kernel_key) { + std::ostringstream stream; + stream << kernel_key; + return stream.str(); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_kernel_type_test.cc b/paddle/framework/op_kernel_type_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..649afeee8a846b0579545f2edff77e9dbe3b4dd8 --- /dev/null +++ b/paddle/framework/op_kernel_type_test.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/op_kernel_type.h" +#include +#include + +TEST(OpKernelType, ToString) { + using OpKernelType = paddle::framework::OpKernelType; + using DataType = paddle::framework::proto::DataType; + using CPUPlace = paddle::platform::CPUPlace; + using DataLayout = paddle::framework::DataLayout; + using LibraryType = paddle::framework::LibraryType; + + OpKernelType op_kernel_type(DataType::FP32, CPUPlace(), DataLayout::kNCHW, + LibraryType::kCUDNN); + + ASSERT_EQ( + paddle::framework::KernelTypeToString(op_kernel_type), + "data_type[5]:data_layout[NCHW]:place[CPUPlace]:library_type[CUDNN]"); +} + +TEST(OpKernelType, Hash) { + using OpKernelType = paddle::framework::OpKernelType; + using DataType = paddle::framework::proto::DataType; + using CPUPlace = paddle::platform::CPUPlace; + using CUDAPlace = paddle::platform::CUDAPlace; + using DataLayout = paddle::framework::DataLayout; + using LibraryType = paddle::framework::LibraryType; + + OpKernelType op_kernel_type_1(DataType::FP32, CPUPlace(), DataLayout::kNCHW, + LibraryType::kCUDNN); + OpKernelType op_kernel_type_2(DataType::FP32, CUDAPlace(0), DataLayout::kNCHW, + LibraryType::kCUDNN); + + OpKernelType::Hash hasher; + ASSERT_NE(hasher(op_kernel_type_1), hasher(op_kernel_type_2)); +} diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 7f0155b61f44b676825b84667d5bebb798cae8a3..d75c0233e8e0134ddf4edc50c07490a234b65cd0 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -37,8 +37,8 @@ class Registrar { public: // In our design, various kinds of classes, e.g., operators and kernels, // have their corresponding registry and registrar. The action of - // registration is in the constructor of a global registrar variable, which, - // however, are not used in the code that calls package framework, and would + // registration is in the constructor of a global registrar variable, which + // are not used in the code that calls package framework, and would // be removed from the generated binary file by the linker. To avoid such // removal, we add Touch to all registrar classes and make USE_OP macros to // call this method. So, as long as the callee code calls USE_OP, the global @@ -61,17 +61,6 @@ struct OperatorRegistrar : public Registrar { class OpRegistry { public: - template - static void RegisterOp(const std::string& op_type, - const std::string& grad_op_type) { - OperatorRegistrar reg(op_type.c_str()); - reg.info.grad_op_type_ = grad_op_type; - // register gradient op - if (!grad_op_type.empty()) { - OperatorRegistrar grad_reg(grad_op_type.c_str()); - } - } - static std::unique_ptr CreateOp(const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, @@ -90,30 +79,31 @@ struct OpKernelRegistrarFunctor { using KERNEL_TYPE = typename std::tuple_element>::type; - void operator()(const char* op_type) const { + void operator()(const char* op_type, const char* library_type) const { using T = typename KERNEL_TYPE::ELEMENT_TYPE; - OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType()); + OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(), + DataLayout::kAnyLayout, StringToLibraryType(library_type)); OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); constexpr auto size = std::tuple_size>::value; OpKernelRegistrarFunctor func; - func(op_type); + func(op_type, library_type); } }; template struct OpKernelRegistrarFunctor { - void operator()(const char* op_type) const {} + void operator()(const char* op_type, const char* library_type) const {} }; // User can register many kernel in one place. The data type could be different. template class OpKernelRegistrar : public Registrar { public: - explicit OpKernelRegistrar(const char* op_type) { + explicit OpKernelRegistrar(const char* op_type, const char* library_type) { OpKernelRegistrarFunctor func; - func(op_type); + func(op_type, library_type); } }; @@ -192,14 +182,15 @@ class OpKernelRegistrar : public Registrar { __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ "REGISTER_OP_KERNEL must be called in global namespace"); \ static ::paddle::framework::OpKernelRegistrar \ - __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type, \ + #DEVICE_TYPE); \ int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ return 0; \ } #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \ - REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::GPUPlace, __VA_ARGS__) + REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__) #define REGISTER_OP_CPU_KERNEL(op_type, ...) \ REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 27713e5cbffe95e0ae31ac94a70c64deb53c4ffb..a286925bbe4cc455a5956b4ac1800a2bafa3bfdb 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -1,15 +1,31 @@ -#include "paddle/framework/op_registry.h" +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include #include +#include "paddle/framework/op_registry.h" + namespace pd = paddle::framework; namespace paddle { namespace framework { + class CosineOp : public OperatorBase { public: using OperatorBase::OperatorBase; - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} + void Run(const Scope& scope, const platform::Place& place) const override {} }; class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { @@ -28,8 +44,7 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: using OperatorBase::OperatorBase; - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} + void Run(const Scope& scope, const platform::Place& place) const override {} }; class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { @@ -76,8 +91,8 @@ TEST(OpRegistry, CreateOp) { auto op = paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::Scope scope; - paddle::platform::CPUDeviceContext dev_ctx; - op->Run(scope, dev_ctx); + paddle::platform::CPUPlace cpu_place; + op->Run(scope, cpu_place); float scale_get = op->Attr("scale"); ASSERT_EQ(scale_get, scale); } @@ -117,8 +132,8 @@ TEST(OpRegistry, DefaultValue) { auto op = paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::Scope scope; - paddle::platform::CPUDeviceContext dev_ctx; - op->Run(scope, dev_ctx); + paddle::platform::CPUPlace cpu_place; + op->Run(scope, cpu_place); ASSERT_EQ(op->Attr("scale"), 1.0); } @@ -167,9 +182,9 @@ TEST(OpRegistry, CustomChecker) { attr->set_type(paddle::framework::proto::AttrType::INT); attr->set_i(4); auto op = paddle::framework::OpRegistry::CreateOp(op_desc); - paddle::platform::CPUDeviceContext dev_ctx; + paddle::platform::CPUPlace cpu_place; paddle::framework::Scope scope; - op->Run(scope, dev_ctx); + op->Run(scope, cpu_place); int test_attr = op->Attr("test_attr"); ASSERT_EQ(test_attr, 4); } @@ -184,3 +199,197 @@ TEST(OperatorRegistrar, Test) { using namespace paddle::framework; OperatorRegistrar reg("cos"); } + +namespace paddle { +namespace framework { + +class OpKernelTestMaker : public OpProtoAndCheckerMaker { + public: + OpKernelTestMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddComment("NoGradOp, same input output. no Grad"); + } +}; + +class OpWithKernelTest : public OperatorWithKernel { + public: + using OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(InferShapeContext* ctx) const override {} + + framework::OpKernelType GetActualKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(proto::DataType::FP32, ctx.device_context()); + } +}; + +template +class OpKernelTest : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const {} +}; + +} // namespace framework +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT(op_with_kernel, + paddle::framework::OpWithKernelTest, + paddle::framework::OpKernelTestMaker); +REGISTER_OP_CPU_KERNEL( + op_with_kernel, + paddle::framework::OpKernelTest); + +REGISTER_OP_CUDA_KERNEL(op_with_kernel, + paddle::framework::OpKernelTest< + paddle::platform::CUDADeviceContext, float>); + +TEST(OperatorRegistrar, CPU) { + paddle::framework::proto::OpDesc op_desc; + paddle::platform::CPUPlace cpu_place; + paddle::framework::Scope scope; + + op_desc.set_type("op_with_kernel"); + auto op = paddle::framework::OpRegistry::CreateOp(op_desc); + + op->Run(scope, cpu_place); +} + +TEST(OperatorRegistrar, CUDA) { + paddle::framework::proto::OpDesc op_desc; + paddle::platform::CUDAPlace cuda_place(0); + paddle::framework::Scope scope; + + op_desc.set_type("op_with_kernel"); + auto op = paddle::framework::OpRegistry::CreateOp(op_desc); + + op->Run(scope, cuda_place); +} + +static int op_test_value = 0; + +using paddle::platform::DeviceContext; +using paddle::platform::CPUDeviceContext; +using paddle::platform::CUDADeviceContext; + +namespace paddle { +namespace framework { + +class OpWithMultiKernelTest : public OperatorWithKernel { + public: + using OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(InferShapeContext* ctx) const override {} + + framework::OpKernelType GetActualKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(proto::DataType::FP32, ctx.device_context()); + } + + framework::OpKernelType GetExpectedKernelType( + const framework::OpKernelType& kernel) const override { + return framework::OpKernelType(kernel.data_type_, platform::CUDAPlace(0), + kernel.data_layout_, + framework::LibraryType::kCUDNN); + } +}; + +template +class OpMultiKernelTest : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const; +}; + +template +class OpMultiKernelTest + : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const { + ++op_test_value; + } +}; + +template +class OpMultiKernelTest + : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const { + --op_test_value; + } +}; + +template +class OpMultiKernelTest2 : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const; +}; + +template +class OpMultiKernelTest2 + : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const { + op_test_value += 10; + } +}; + +template +class OpMultiKernelTest2 + : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const { + op_test_value -= 10; + } +}; + +} // namespace framework +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT(op_with_multi_kernel, + paddle::framework::OpWithMultiKernelTest, + paddle::framework::OpKernelTestMaker); +REGISTER_OP_KERNEL( + op_with_multi_kernel, CPU, paddle::platform::CPUPlace, + paddle::framework::OpMultiKernelTest); +REGISTER_OP_KERNEL( + op_with_multi_kernel, MKLDNN, paddle::platform::CPUPlace, + paddle::framework::OpMultiKernelTest2); +REGISTER_OP_KERNEL( + op_with_multi_kernel, CUDA, paddle::platform::CUDAPlace, + paddle::framework::OpMultiKernelTest); +REGISTER_OP_KERNEL( + op_with_multi_kernel, CUDNN, paddle::platform::CUDAPlace, + paddle::framework::OpMultiKernelTest2); + +TEST(OperatorRegistrar, OpWithMultiKernel) { + paddle::framework::proto::OpDesc op_desc; + paddle::platform::CUDAPlace cuda_place(0); + paddle::platform::CPUPlace cpu_place; + paddle::framework::Scope scope; + + op_desc.set_type("op_with_multi_kernel"); + auto op = paddle::framework::OpRegistry::CreateOp(op_desc); + + // use all available kernels + paddle::framework::UseALL(); + op->Run(scope, cuda_place); + EXPECT_EQ(op_test_value, -10); + + // remove cuda kernels + paddle::framework::UseCPU(); + op->Run(scope, cpu_place); + + EXPECT_EQ(op_test_value, -9); + + // add cuda kernels + paddle::framework::UseCUDA(); + op->Run(scope, cuda_place); + + EXPECT_EQ(op_test_value, -10); + + // use cudnn kernel + paddle::framework::UseCUDNN(); + op->Run(scope, cuda_place); + EXPECT_EQ(op_test_value, -20); +} diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 0e58c0b5707516bd1274181df568d08ff504c152..b9dcf16da5d5e1d10176b6c9ae6ea1be080064ae 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -11,17 +11,68 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include -#include "paddle/framework/operator.h" #include #include + +#include "paddle/framework/data_transform.h" +#include "paddle/framework/executor.h" #include "paddle/framework/lod_tensor_array.h" +#include "paddle/framework/operator.h" #include "paddle/framework/shape_inference.h" #include "paddle/framework/var_type.h" namespace paddle { namespace framework { +std::vector> kKernelPriority; + +void UseCPU() { + kKernelPriority.clear(); + /*Plain CPU*/ + auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kPlain); + kKernelPriority.insert(kKernelPriority.begin(), pair0); +} + +void UseMKLDNN() { + UseCPU(); +#if PADDLE_WITH_MKLML + { + /*MKLDNN Kernel*/ + auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN); + kKernelPriority.insert(kKernelPriority.begin(), pair0); + } +#endif +} + +void UseCUDA() { + UseMKLDNN(); +#if PADDLE_WITH_CUDA + /*Plain GPU*/ + auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain); + kKernelPriority.insert(kKernelPriority.begin(), pair0); +#endif +} + +void UseCUDNN() { + UseCUDA(); +#if PADDLE_WITH_CUDA + if (platform::dynload::HasCUDNN()) { + /*CUDNN Kernel*/ + auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN); + kKernelPriority.insert(kKernelPriority.begin(), pair0); + } +#endif +} + +void UseALL() { + UseCPU(); + UseMKLDNN(); + UseCUDA(); + UseCUDNN(); +} + std::string OperatorBase::Input(const std::string& name) const { auto& ins = Inputs(name); PADDLE_ENFORCE_LE(ins.size(), 1UL, @@ -182,7 +233,8 @@ static const Tensor* GetTensorFromVar(const Variable* var) { } else if (var->IsType()) { t = &(var->Get().value()); } else { - PADDLE_THROW("Variable type must be LoDTensor/SelectedRows."); + PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", + var->Type().name()); } return t; } @@ -194,7 +246,8 @@ static Tensor* GetMutableTensorFromVar(Variable* var) { } else if (var->IsType()) { t = var->GetMutable()->mutable_value(); } else { - PADDLE_THROW("Variable type must be LoDTensor/SelectedRows."); + PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", + var->Type().name()); } return t; } @@ -240,12 +293,6 @@ std::vector ExecutionContext::MultiOutput( return res; } -std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key) { - os << "place[" << kernel_key.place_ << "]:data_type[" << kernel_key.data_type_ - << "]"; - return os; -} - bool OpSupportGPU(const std::string& op_type) { auto& all_kernels = OperatorWithKernel::AllOpKernels(); auto it = all_kernels.find(op_type); @@ -362,7 +409,8 @@ class RuntimeInferShapeContext : public InferShapeContext { } else if (var->IsType()) { return var->Get().GetCompleteDims(); } else { - PADDLE_THROW("Variable type must be LoDTensor/SelectedRows."); + PADDLE_THROW("Variable %s type_id %s, expect LoDTensor/SelectedRows.", + name, var->Type().name()); } } @@ -373,7 +421,8 @@ class RuntimeInferShapeContext : public InferShapeContext { } else if (var->IsType()) { var->GetMutable()->set_height(dim[0]); } else { - PADDLE_THROW("Variable type must be LoDTensor/SelectedRows."); + PADDLE_THROW("Variable %s type_id %s, expect LoDTensor/SelectedRows.", + name, var->Type().name()); } } @@ -387,12 +436,36 @@ class RuntimeInferShapeContext : public InferShapeContext { const Scope& scope_; }; +const platform::DeviceContext* GetDeviceContext( + framework::KernelTypePair& kernel_pair) { + auto& actual_kernel_key = kernel_pair.first; + auto& expected_kernel_key = kernel_pair.second; + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + + if (platform::is_gpu_place(actual_kernel_key.place_) && + platform::is_cpu_place(expected_kernel_key.place_)) { + return pool.Get(actual_kernel_key.place_); + } else if (platform::is_cpu_place(actual_kernel_key.place_) && + platform::is_gpu_place(expected_kernel_key.place_)) { + return pool.Get(expected_kernel_key.place_); + } else { + PADDLE_THROW( + "Currently, model parallelism is only supported between CPU and CUDA"); + } +} + +const platform::DeviceContext* GetDeviceContext( + const framework::OpKernelType& kernel) { + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + return pool.Get(kernel.place_); +} + void OperatorWithKernel::Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const { + const platform::Place& place) const { RuntimeInferShapeContext infer_shape_ctx(*this, scope); this->InferShape(&infer_shape_ctx); - - ExecutionContext ctx(*this, scope, dev_ctx); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(place); // check if op[type] has kernel registered. auto& all_op_kernels = AllOpKernels(); @@ -404,19 +477,94 @@ void OperatorWithKernel::Run(const Scope& scope, // check if op[type] have kernel for kernel_key OpKernelMap& kernels = kernels_iter->second; - auto kernel_key = GetKernelType(ctx); - auto kernel_iter = kernels.find(kernel_key); + + ExecutionContext ctx(*this, scope, *dev_ctx); + auto actual_kernel_key = GetActualKernelType(ctx); + + auto expected_kernel_key = GetExpectedKernelType(actual_kernel_key); + + if (actual_kernel_key == expected_kernel_key) { + PADDLE_ENFORCE_EQ(actual_kernel_key.place_, expected_kernel_key.place_, + "Currently, model parallelism is only supported between " + "CPU and other devices. For example, multi-GPU model " + "parallelism will failed."); + } else { + // find the best key candidate + const DataTransformFnMap& trans_map = DataTransformFnMap::Instance(); + for (auto& candidate : kKernelPriority) { + auto candidate_key = + OpKernelType(actual_kernel_key.data_type_, std::get<0>(candidate), + actual_kernel_key.data_layout_, std::get<1>(candidate)); + + auto candidate_pair = std::make_pair(actual_kernel_key, candidate_key); + if ((actual_kernel_key == candidate_key) || + (kernels.count(candidate_key) && + trans_map.GetNullable(candidate_pair))) { + expected_kernel_key = candidate_key; + break; + } + } + + auto kernel_pair = std::make_pair(actual_kernel_key, expected_kernel_key); + const DataTransformFn* trans_fun = trans_map.GetNullable(kernel_pair); + if (trans_fun) { + auto input_vars = this->InputVars(); + // TODO(qijun) filter the input vars that do not need to be transformed + + // filter vars that has been transformed + std::vector need_trans; + for (auto var_name : input_vars) { + auto var_name_trans = + var_name + framework::KernelTypeToString(expected_kernel_key); + if (!scope.FindVar(var_name_trans)) { + const_cast(scope).Var(var_name_trans); + need_trans.push_back(var_name); + } + } + + if (!need_trans.empty()) { + auto trans_dev_ctx = GetDeviceContext(kernel_pair); + + // Wait for transform starting + dev_ctx->Wait(); + + for (auto var_name : need_trans) { + (*trans_fun)(trans_dev_ctx, kernel_pair, *(scope.FindVar(var_name)), + scope.FindVar(var_name + framework::KernelTypeToString( + expected_kernel_key))); + } + // Wait for data transform finishing + trans_dev_ctx->Wait(); + } + } + } + + VLOG(10) << "Actual kernel: " << actual_kernel_key + << "Expected kernel: " << expected_kernel_key; + + auto kernel_iter = kernels.find(expected_kernel_key); if (kernel_iter == kernels.end()) { - PADDLE_THROW("The operator %s does not support %s", type_, kernel_key); + PADDLE_THROW("The operator %s does not support %s", type_, + expected_kernel_key); } - kernel_iter->second->Compute(ctx); + auto* expected_dev_ctx = GetDeviceContext(expected_kernel_key); + ExecutionContext expected_ctx(*this, scope, *expected_dev_ctx); + + kernel_iter->second->Compute(expected_ctx); } -OpKernelType OperatorWithKernel::GetKernelType( + +OpKernelType OperatorWithKernel::GetActualKernelType( const ExecutionContext& ctx) const { return OpKernelType(IndicateDataType(ctx), ctx.GetPlace()); } + +OpKernelType OperatorWithKernel::GetExpectedKernelType( + const OpKernelType& actual_kernel_type) const { + return actual_kernel_type; +} + proto::DataType OperatorWithKernel::IndicateDataType( const ExecutionContext& ctx) const { auto& scope = ctx.scope(); diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 3207360cbaca4e3b96dfe933c67aaa70c59a6044..1f5a4af58c5a9ad2fa8f4ac08ece67084b8f741a 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -17,21 +17,21 @@ limitations under the License. */ #include #include #include +#include #include #include #include "glog/logging.h" // For VLOG #include "paddle/framework/attribute.h" #include "paddle/framework/block_desc.h" -#include "paddle/framework/data_type.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_info.h" +#include "paddle/framework/op_kernel_type.h" #include "paddle/framework/scope.h" #include "paddle/framework/selected_rows.h" #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" -#include "paddle/platform/place.h" #include "paddle/platform/variant.h" #include "paddle/utils/Error.h" @@ -53,6 +53,34 @@ constexpr char kGradVarSuffix[] = "@GRAD"; /// Variables with this suffix are supposed to be filled up with zeros. constexpr char kZeroVarSuffix[] = "@ZERO"; +// define some kernel priority +extern std::vector> kKernelPriority; + +/** + * @brief Use cpu kernel only + */ +void UseCPU(); + +/** + * @brief Perfer MKLDNN kernel than Plain CPU kernel + */ +void UseMKLDNN(); + +/** + * @brief Perfer CUDA kernel than Plain CPU kernel + */ +void UseCUDA(); + +/** + * @brief Perfer cudnn kernel than Plain CUDA kernel + */ +void UseCUDNN(); + +/** + * @brief Use all available kernels + */ +void UseALL(); + inline std::string GradVarName(const std::string& var_name) { return var_name + kGradVarSuffix; } @@ -83,8 +111,10 @@ class OperatorBase { virtual std::string DebugString() const; /// Net will call this function to Run an op. - virtual void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const = 0; + virtual void Run(const Scope& scope, const platform::Place& place) const = 0; + + // FIXME(typhoonzero): this is only used for recv_op to stop event_loop. + virtual void Stop() {} virtual bool IsNetOp() const { return false; } @@ -159,8 +189,7 @@ class OperatorBase { class NOP : public OperatorBase { public: using OperatorBase::OperatorBase; - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} + void Run(const Scope& scope, const platform::Place& place) const override {} std::unique_ptr Clone() const override { return std::unique_ptr(new NOP(*this)); } @@ -345,34 +374,6 @@ class OpKernel : public OpKernelBase { using ELEMENT_TYPE = T; }; -struct OpKernelType { - struct Hash { - std::hash hash_; - size_t operator()(const OpKernelType& key) const { - int place = key.place_.which(); - int data_type = static_cast(key.data_type_); - int pre_hash = data_type << NUM_PLACE_TYPE_LIMIT_IN_BIT | - (place & ((1 << NUM_PLACE_TYPE_LIMIT_IN_BIT) - 1)); - return hash_(pre_hash); - } - }; - - platform::Place place_; - proto::DataType data_type_; - - OpKernelType(proto::DataType data_type, platform::Place place) - : place_(place), data_type_(data_type) {} - - OpKernelType(proto::DataType data_type, - const platform::DeviceContext& dev_ctx) - : place_(dev_ctx.GetPlace()), data_type_(data_type) {} - - bool operator==(const OpKernelType& o) const { - return platform::places_are_same_class(place_, o.place_) && - data_type_ == o.data_type_; - } -}; - class OperatorWithKernel : public OperatorBase { public: using OpKernelMap = @@ -383,8 +384,7 @@ class OperatorWithKernel : public OperatorBase { const VariableNameMap& outputs, const AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const final; + void Run(const Scope& scope, const platform::Place& place) const final; static std::unordered_map& AllOpKernels() { @@ -405,7 +405,9 @@ class OperatorWithKernel : public OperatorBase { } protected: - virtual OpKernelType GetKernelType(const ExecutionContext& ctx) const; + virtual OpKernelType GetActualKernelType(const ExecutionContext& ctx) const; + virtual OpKernelType GetExpectedKernelType( + const OpKernelType& actual_kernel_type) const; private: // indicate kernel DataType by input data. Defaultly all input data must be @@ -413,8 +415,6 @@ class OperatorWithKernel : public OperatorBase { proto::DataType IndicateDataType(const ExecutionContext& ctx) const; }; -std::ostream& operator<<(std::ostream& os, const OpKernelType& kernel_key); - extern bool OpSupportGPU(const std::string& op_type); } // namespace framework diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 05a465152204c8e9f9dbd75d0bfb21ea44d25cf1..4d38a7ada91af834aa1a19b49e36d606ebe786ba 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -11,11 +11,12 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#include "paddle/framework/operator.h" #include "gtest/gtest.h" + +#include "paddle/framework/init.h" #include "paddle/framework/op_info.h" #include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" namespace paddle { namespace framework { @@ -27,8 +28,7 @@ class OpWithoutKernelTest : public OperatorBase { OpWithoutKernelTest(const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs), x(1) {} - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + void Run(const Scope& scope, const platform::Place& place) const override { ++op_run_num; ASSERT_EQ(static_cast(inputs_.size()), 1); ASSERT_EQ(static_cast(outputs_.size()), 1); @@ -41,10 +41,9 @@ class OpWithoutKernelTest : public OperatorBase { int x{0}; }; -class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { +class OpWithoutKernelCheckerMaker : public OpProtoAndCheckerMaker { public: - OpeWithoutKernelTestProtoAndCheckerMaker(OpProto* proto, - OpAttrChecker* op_checker) + OpWithoutKernelCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("input", "input of test op"); AddOutput("output", "output of test op"); @@ -65,11 +64,12 @@ static void BuildVar(const std::string& param_name, } } -REGISTER_OP_WITHOUT_GRADIENT( - test_operator, paddle::framework::OpWithoutKernelTest, - paddle::framework::OpeWithoutKernelTestProtoAndCheckerMaker); +REGISTER_OP_WITHOUT_GRADIENT(test_operator, + paddle::framework::OpWithoutKernelTest, + paddle::framework::OpWithoutKernelCheckerMaker); TEST(OperatorBase, all) { + paddle::framework::InitDevices({"CPU"}); paddle::framework::proto::OpDesc op_desc; op_desc.set_type("test_operator"); BuildVar("input", {"IN1"}, op_desc.add_inputs()); @@ -80,13 +80,13 @@ TEST(OperatorBase, all) { attr->set_type(paddle::framework::proto::AttrType::FLOAT); attr->set_f(3.14); - paddle::platform::CPUDeviceContext device_context; + paddle::platform::CPUPlace cpu_place; paddle::framework::Scope scope; auto op = paddle::framework::OpRegistry::CreateOp(op_desc); scope.Var("OUT1"); ASSERT_EQ(paddle::framework::op_run_num, 0); - op->Run(scope, device_context); + op->Run(scope, cpu_place); ASSERT_EQ(paddle::framework::op_run_num, 1); } @@ -114,7 +114,7 @@ class OpWithKernelTest : public OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override {} - OpKernelType GetKernelType(const ExecutionContext& ctx) const override { + OpKernelType GetActualKernelType(const ExecutionContext& ctx) const override { return OpKernelType(proto::DataType::FP32, ctx.GetPlace()); } }; @@ -123,7 +123,6 @@ template class CPUKernelTest : public OpKernel { public: void Compute(const ExecutionContext& ctx) const { - std::cout << "this is cpu kernel" << std::endl; std::cout << ctx.op().DebugString() << std::endl; cpu_kernel_run_num++; ASSERT_EQ(ctx.op().Input("x"), "IN1"); @@ -195,6 +194,7 @@ REGISTER_OP_CPU_KERNEL(op_with_kernel, // test with single input TEST(OpKernel, all) { + paddle::framework::InitDevices({"CPU"}); paddle::framework::proto::OpDesc op_desc; op_desc.set_type("op_with_kernel"); BuildVar("x", {"IN1"}, op_desc.add_inputs()); @@ -205,12 +205,12 @@ TEST(OpKernel, all) { attr->set_type(paddle::framework::proto::AttrType::FLOAT); attr->set_f(3.14); - paddle::platform::CPUDeviceContext cpu_device_context; + paddle::platform::CPUPlace cpu_place; paddle::framework::Scope scope; auto op = paddle::framework::OpRegistry::CreateOp(op_desc); ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 0); - op->Run(scope, cpu_device_context); + op->Run(scope, cpu_place); ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 1); } @@ -224,7 +224,9 @@ REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel, TEST(OpKernel, multi_inputs) { using namespace paddle::framework; + paddle::framework::InitDevices({"CPU"}); proto::OpDesc op_desc; + op_desc.set_type("op_multi_inputs_with_kernel"); BuildVar("xs", {"x0", "x1", "x2"}, op_desc.add_inputs()); BuildVar("k", {"k0"}, op_desc.add_inputs()); @@ -235,7 +237,7 @@ TEST(OpKernel, multi_inputs) { attr->set_type(paddle::framework::proto::AttrType::FLOAT); attr->set_f(3.14); - paddle::platform::CPUDeviceContext cpu_device_context; + paddle::platform::CPUPlace cpu_place; paddle::framework::Scope scope; scope.Var("x0")->GetMutable(); scope.Var("x1")->GetMutable(); @@ -245,7 +247,7 @@ TEST(OpKernel, multi_inputs) { scope.Var("y1")->GetMutable(); auto op = paddle::framework::OpRegistry::CreateOp(op_desc); - op->Run(scope, cpu_device_context); + op->Run(scope, cpu_place); } class OperatorClone : public paddle::framework::OperatorBase { @@ -257,10 +259,11 @@ class OperatorClone : public paddle::framework::OperatorBase { const paddle::framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const paddle::framework::Scope& scope, - const paddle::platform::DeviceContext& dev_ctx) const override {} + const paddle::platform::Place& place) const override {} }; TEST(Operator, Clone) { + paddle::framework::InitDevices({"CPU"}); OperatorClone a("ABC", paddle::framework::VariableNameMap{}, paddle::framework::VariableNameMap{}, paddle::framework::AttributeMap{}); diff --git a/paddle/framework/program_desc_test.cc b/paddle/framework/program_desc_test.cc index a49886f7ea56bc57459202dba65e3f76a902cd70..59947c9f2189348226b7ff6c2b9315196bbf55fa 100644 --- a/paddle/framework/program_desc_test.cc +++ b/paddle/framework/program_desc_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/program_desc.h" #include "gtest/gtest.h" diff --git a/paddle/framework/prune_test.cc b/paddle/framework/prune_test.cc index bdd57659432ea4f9bdd05425a802110b0c202fb8..d76c5abca94cb87220ce73537a8657c3ec695f4d 100644 --- a/paddle/framework/prune_test.cc +++ b/paddle/framework/prune_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/prune.h" diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 656736e23846c8de50553a608c54a0bdd3272cb1..4e80e3d974e2b646ad62d26991e4629f8c450578 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include // for unique_ptr #include // for call_once #include "glog/logging.h" +#include "paddle/framework/threadpool.h" #include "paddle/string/printf.h" namespace paddle { @@ -74,17 +75,9 @@ void Scope::DropKids() { kids_.clear(); } -std::vector Scope::GetAllNames(bool recursive) const { - std::vector known_vars(vars_.size()); - - if (recursive) { - for (auto& kid : kids_) { - auto kid_vars = kid->GetAllNames(); - for (auto& p : kid_vars) { - known_vars.emplace_back(p); - } - } - } +std::vector Scope::LocalVarNames() const { + std::vector known_vars; + known_vars.reserve(this->vars_.size()); for (auto& p : vars_) { known_vars.emplace_back(p.first); } @@ -95,7 +88,8 @@ void Scope::DeleteScope(Scope* scope) { auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); - delete scope; + // Make delete async. + Async([scope] { delete scope; }); } void Scope::Rename(const std::string& origin_name, diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index 56e815db54b6385c4e4d87f456ed5d59113ca77b..10143326dfa201894c777b3e5e226d5ca5015eda 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -66,7 +66,7 @@ class Scope { void DropKids(); // enumerate all the variables current contains. - std::vector GetAllNames(bool recursive = false) const; + std::vector LocalVarNames() const; // Rename variable to a new name void Rename(const std::string& origin_name, diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc index f738d5ba9ecda57ea25bb5f84057d1d0106eef66..0f5b86061dbdebde08badca7f984f4a2c8d7bc79 100644 --- a/paddle/framework/scope_test.cc +++ b/paddle/framework/scope_test.cc @@ -61,7 +61,7 @@ TEST(Scope, GetAllNames) { Variable* v = s.Var("a"); EXPECT_EQ(&s, s.FindScope(v)); - std::vector ans = s.GetAllNames(); + std::vector ans = s.LocalVarNames(); std::string str; for (auto& var : ans) { str += var; diff --git a/paddle/framework/selected_rows.cc b/paddle/framework/selected_rows.cc index c74459c9dd7006a24615b1d6df041583088fb25c..3b3e60177a495cc99f38ee8b82af41c4c76b8652 100644 --- a/paddle/framework/selected_rows.cc +++ b/paddle/framework/selected_rows.cc @@ -12,5 +12,58 @@ limitations under the License. */ #include "paddle/framework/selected_rows.h" namespace paddle { -namespace framework {} // namespace framework +namespace framework { +void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows, + const platform::DeviceContext& dev_ctx) { + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + os.write(reinterpret_cast(&version), sizeof(version)); + } + { + // the 2st field, rows information + auto& rows = selected_rows.rows(); + uint64_t size = rows.size(); + os.write(reinterpret_cast(&size), sizeof(size)); + for (uint64_t i = 0; i < size; ++i) { + os.write(reinterpret_cast(&rows[i]), sizeof(rows[i])); + } + } + { + // the 3st field, the height of SelectedRows + int64_t height = selected_rows.height(); + os.write(reinterpret_cast(&height), sizeof(height)); + } + // the 4st field, Tensor data + SerializeToStream(os, selected_rows.value(), dev_ctx); +} + +void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows, + const platform::DeviceContext& dev_ctx) { + { + // the 1st field, unit32_t version for SelectedRows + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + } + { + // the 2st field, rows information + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + auto& rows = *selected_rows->mutable_rows(); + rows.resize(size); + for (uint64_t i = 0; i < size; ++i) { + is.read(reinterpret_cast(&rows[i]), sizeof(int64_t)); + } + } + { + // the 3st field, the height of the SelectedRows + int64_t height; + is.read(reinterpret_cast(&height), sizeof(int64_t)); + selected_rows->set_height(height); + } + // the 4st field, tensor which contains the data + DeserializeFromStream(is, selected_rows->mutable_value(), dev_ctx); +} + +} // namespace framework } // namespace paddle diff --git a/paddle/framework/selected_rows.h b/paddle/framework/selected_rows.h index 0332b91323e3a4b4b80e02302ad3dcafe0986cde..30d3dfc1e89f073a8180ceacf77619b36f7079a9 100644 --- a/paddle/framework/selected_rows.h +++ b/paddle/framework/selected_rows.h @@ -59,5 +59,15 @@ class SelectedRows { int64_t height_; }; +/* + * Serialize/Desiralize SelectedRows to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. GPU tensor will be copied to CPU. + */ +void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows, + const platform::DeviceContext& dev_ctx); +void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows, + const platform::DeviceContext& dev_ctx); + } // namespace framework } // namespace paddle diff --git a/paddle/framework/selected_rows_test.cc b/paddle/framework/selected_rows_test.cc index 4ee13a65d72e44693573397bb686b355effb2227..8ff3fb6a97199a2798ab29c56957a0f77fa26628 100644 --- a/paddle/framework/selected_rows_test.cc +++ b/paddle/framework/selected_rows_test.cc @@ -43,5 +43,21 @@ TEST_F(SelectedRowsTester, complete_dims) { ASSERT_EQ(selected_rows_->GetCompleteDims(), make_ddim({10, 100})); } +TEST_F(SelectedRowsTester, SerializeAndDeseralize) { + SelectedRows dst_tensor; + platform::CPUDeviceContext cpu_ctx(place_); + std::ostringstream oss; + + SerializeToStream(oss, *selected_rows_, cpu_ctx); + + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor, cpu_ctx); + + ASSERT_EQ(selected_rows_->rows(), dst_tensor.rows()); + ASSERT_EQ(selected_rows_->height(), dst_tensor.height()); + ASSERT_EQ(selected_rows_->value().dims(), dst_tensor.value().dims()); + ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims()); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/shape_inference.cc b/paddle/framework/shape_inference.cc index 86dc01665bda5e7f988e60780c0600b049d737ef..e53cc0cdabc623ae358f1a3e21823a2f38ec3c62 100644 --- a/paddle/framework/shape_inference.cc +++ b/paddle/framework/shape_inference.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/shape_inference.h" #include "grad_op_desc_maker.h" #include "paddle/framework/operator.h" diff --git a/paddle/framework/tensor.cc b/paddle/framework/tensor.cc index ea7b2a1f7b17d9abc2c2e14de5ecd1cf4a1a5027..f922e606249849e621e679f71d6dbe0f007c8464 100644 --- a/paddle/framework/tensor.cc +++ b/paddle/framework/tensor.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/tensor.h" diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 6a0c5133c9a6bb326ca51755242e75b6eb9e5474..02b125cbbee641c01ad34b984c634b6d95d65a8e 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -20,12 +20,12 @@ limitations under the License. */ #include #include +#include "paddle/framework/data_layout.h" #include "paddle/framework/ddim.h" #include "paddle/memory/memory.h" #include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" -#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { @@ -55,6 +55,8 @@ class Tensor { template inline const T* data() const; + inline void switch_place(platform::Place new_place); + /** * @brief Return a pointer to mutable memory block. * @note If not exist, then allocation. @@ -115,6 +117,10 @@ class Tensor { inline void check_memory_size() const; + inline DataLayout layout() const { return layout_; } + + inline void set_layout(const DataLayout layout) { layout_ = layout; } + private: friend class LoDTensor; @@ -173,6 +179,19 @@ class Tensor { DDim dims_; + /** + * @brief the layout of memory block, default is NHWC. + * + * @note the memory allocation order, describe how weight/data is stored + * For example, in 4-D Tensor(rank=4), there are three commonly + * used layout. They are + * NCHW, NHWC, CHWN. + * N,C,H,W for respectively the batch size, the number of + * feature maps, the height. + */ + + DataLayout layout_ = DataLayout::kNHWC; + /** * @brief A PlaceHolder may be shared by more than one tensor. * @@ -183,6 +202,15 @@ class Tensor { size_t offset_; }; +inline void Tensor::switch_place(platform::Place new_place) { + if (holder_->place() == new_place) { + return; + } + + // TODO(tonyyang-svail): do memcpy here. + PADDLE_THROW("Not Implemented"); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor.md b/paddle/framework/tensor.md index 7a80816d8e4ffa3a9462f3d9b87eff0f048466aa..0a27ac9bb6b03649d42e12100fda9e80a56e7f56 100644 --- a/paddle/framework/tensor.md +++ b/paddle/framework/tensor.md @@ -71,7 +71,7 @@ private: ``` ```c++ -typedef boost::variant Place; +typedef boost::variant Place; typedef boost::variant, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>, Dim<8>, Dim<9>> DDimVar; typedef boost::variant< diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index aba1f9f09329f890ef190f8820b958c56f017e89..6c6f298edc187a87677089e54c4c9046821282df 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -125,11 +125,11 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { boost::get(place), size, type)); } else if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); } #else - holder_.reset(new PlaceholderImpl( - boost::get(place), size, type)); + holder_.reset(new PlaceholderImpl( + boost::get(place), size, type)); } #endif offset_ = 0; @@ -165,6 +165,7 @@ inline Tensor Tensor::Slice(int begin_idx, int end_idx) const { size_t base = numel() / dims_[0]; Tensor dst; dst.holder_ = holder_; + dst.set_layout(layout_); DDim dst_dims = dims_; dst_dims[0] = end_idx - begin_idx; dst.Resize(dst_dims); diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index ceca64365a1a628642eb374a3e3bbdff490c955a..a1b4a03289eca4c8b9d8c23ede4221853cb31f79 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -15,12 +15,13 @@ #include #include +namespace framework = paddle::framework; +namespace platform = paddle::platform; + TEST(Tensor, Dims) { - using namespace paddle::framework; - using namespace paddle::platform; - Tensor tt; + framework::Tensor tt; tt.Resize({2, 3, 4}); - DDim dims = tt.dims(); + framework::DDim dims = tt.dims(); ASSERT_EQ(arity(dims), 3); for (int i = 0; i < 3; ++i) { EXPECT_EQ(i + 2, dims[i]); @@ -28,12 +29,12 @@ TEST(Tensor, Dims) { } TEST(Tensor, DataAssert) { - paddle::framework::Tensor src_tensor; + framework::Tensor src_tensor; bool caught = false; try { src_tensor.data(); - } catch (paddle::platform::EnforceNotMet err) { + } catch (platform::EnforceNotMet err) { caught = true; std::string msg = "holder_ should not be null\nTensor holds no memory. Call " @@ -50,61 +51,65 @@ TEST(Tensor, DataAssert) { because Memory::Alloc() and Memory::Free() have not been ready. */ TEST(Tensor, MutableData) { - using namespace paddle::framework; - using namespace paddle::platform; { - Tensor src_tensor; + framework::Tensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), CPUPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({1, 2, 3}), + platform::CPUPlace()); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), CPUPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({3, 4}), + platform::CPUPlace()); EXPECT_NE(p2, nullptr); EXPECT_NE(p1, p2); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), CPUPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({2, 2, 3}), + platform::CPUPlace()); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), CPUPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({2, 2}), + platform::CPUPlace()); EXPECT_EQ(p1, p2); } #ifdef PADDLE_WITH_CUDA { - Tensor src_tensor; + framework::Tensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({1, 2, 3}), + platform::CUDAPlace()); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(make_ddim({3, 4}), GPUPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({3, 4}), + platform::CUDAPlace()); EXPECT_NE(p2, nullptr); EXPECT_NE(p1, p2); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(make_ddim({2, 2, 3}), GPUPlace()); + p1 = src_tensor.mutable_data(framework::make_ddim({2, 2, 3}), + platform::CUDAPlace()); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(make_ddim({2, 2}), GPUPlace()); + p2 = src_tensor.mutable_data(framework::make_ddim({2, 2}), + platform::CUDAPlace()); EXPECT_EQ(p1, p2); } #endif } TEST(Tensor, ShareDataWith) { - using namespace paddle::framework; - using namespace paddle::platform; { - Tensor src_tensor; - Tensor dst_tensor; + framework::Tensor src_tensor; + framework::Tensor dst_tensor; // Try to share data form uninitialized tensor bool caught = false; try { @@ -121,16 +126,18 @@ TEST(Tensor, ShareDataWith) { } ASSERT_TRUE(caught); - src_tensor.mutable_data(make_ddim({2, 3, 4}), CPUPlace()); + src_tensor.mutable_data(framework::make_ddim({2, 3, 4}), + platform::CPUPlace()); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } #ifdef PADDLE_WITH_CUDA { - Tensor src_tensor; - Tensor dst_tensor; - src_tensor.mutable_data(make_ddim({2, 3, 4}), GPUPlace()); + framework::Tensor src_tensor; + framework::Tensor dst_tensor; + src_tensor.mutable_data(framework::make_ddim({2, 3, 4}), + platform::CUDAPlace()); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } @@ -138,13 +145,12 @@ TEST(Tensor, ShareDataWith) { } TEST(Tensor, Slice) { - using namespace paddle::framework; - using namespace paddle::platform; { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({5, 3, 4}), CPUPlace()); - Tensor slice_tensor = src_tensor.Slice(1, 3); - DDim slice_dims = slice_tensor.dims(); + framework::Tensor src_tensor; + src_tensor.mutable_data(framework::make_ddim({5, 3, 4}), + platform::CPUPlace()); + framework::Tensor slice_tensor = src_tensor.Slice(1, 3); + framework::DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 3); EXPECT_EQ(slice_dims[0], 2); EXPECT_EQ(slice_dims[1], 3); @@ -153,11 +159,12 @@ TEST(Tensor, Slice) { uintptr_t src_data_address = reinterpret_cast(src_tensor.data()); uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), CPUPlace())); + src_tensor.mutable_data(src_tensor.dims(), platform::CPUPlace())); uintptr_t slice_data_address = reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), CPUPlace())); + uintptr_t slice_mutable_data_address = + reinterpret_cast(slice_tensor.mutable_data( + slice_tensor.dims(), platform::CPUPlace())); EXPECT_EQ(src_data_address, src_mutable_data_address); EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address); @@ -165,22 +172,25 @@ TEST(Tensor, Slice) { #ifdef PADDLE_WITH_CUDA { - Tensor src_tensor; - src_tensor.mutable_data(make_ddim({6, 9}), GPUPlace()); - Tensor slice_tensor = src_tensor.Slice(2, 6); - DDim slice_dims = slice_tensor.dims(); + framework::Tensor src_tensor; + src_tensor.mutable_data(framework::make_ddim({6, 9}), + platform::CUDAPlace()); + framework::Tensor slice_tensor = src_tensor.Slice(2, 6); + framework::DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 2); EXPECT_EQ(slice_dims[0], 4); EXPECT_EQ(slice_dims[1], 9); uintptr_t src_data_address = reinterpret_cast(src_tensor.data()); - uintptr_t src_mutable_data_address = reinterpret_cast( - src_tensor.mutable_data(src_tensor.dims(), GPUPlace())); + uintptr_t src_mutable_data_address = + reinterpret_cast(src_tensor.mutable_data( + src_tensor.dims(), platform::CUDAPlace())); uintptr_t slice_data_address = reinterpret_cast(slice_tensor.data()); - uintptr_t slice_mutable_data_address = reinterpret_cast( - slice_tensor.mutable_data(slice_tensor.dims(), GPUPlace())); + uintptr_t slice_mutable_data_address = + reinterpret_cast(slice_tensor.mutable_data( + slice_tensor.dims(), platform::CUDAPlace())); EXPECT_EQ(src_data_address, src_mutable_data_address); EXPECT_EQ(slice_data_address, slice_mutable_data_address); EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address); @@ -189,14 +199,19 @@ TEST(Tensor, Slice) { } TEST(Tensor, ReshapeToMatrix) { - using namespace paddle::framework; - using namespace paddle::platform; - Tensor src; - int* src_ptr = src.mutable_data({2, 3, 4, 9}, CPUPlace()); + framework::Tensor src; + int* src_ptr = src.mutable_data({2, 3, 4, 9}, platform::CPUPlace()); for (int i = 0; i < 2 * 3 * 4 * 9; ++i) { src_ptr[i] = i; } - Tensor res = ReshapeToMatrix(src, 2); + framework::Tensor res = framework::ReshapeToMatrix(src, 2); ASSERT_EQ(res.dims()[0], 2 * 3); ASSERT_EQ(res.dims()[1], 4 * 9); } + +TEST(Tensor, Layout) { + framework::Tensor src; + ASSERT_EQ(src.layout(), framework::DataLayout::kNHWC); + src.set_layout(framework::DataLayout::kAnyLayout); + ASSERT_EQ(src.layout(), framework::DataLayout::kAnyLayout); +} diff --git a/paddle/framework/tensor_util.cc b/paddle/framework/tensor_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..7efc649d0bcda67c663d148e83bcbb6789b0f371 --- /dev/null +++ b/paddle/framework/tensor_util.cc @@ -0,0 +1,119 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/tensor_util.h" + +namespace paddle { +namespace framework { +template +struct AnyDTypeVisitor { + Predicate predicate_; + const Tensor& tensor_; + const DevCtx& ctx_; + Tensor* out_; + + AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx, + Tensor* out) + : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {} + + template + void operator()() const { + auto t = EigenVector::Flatten(tensor_); + auto o = EigenScalar::From(*out_); + // return any of predicate_(t) is true. + o.device(*ctx_.eigen_device()) = predicate_(t).any(); + } +}; + +template +inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, + const DevCtx& ctx, framework::Tensor* out) { + VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor( + predicate, tensor, ctx, out)); +} + +template +struct AnyVisitor : public boost::static_visitor { + const framework::Tensor& tensor_; + Predicate predicate_; + + AnyVisitor(const framework::Tensor& tensor, Predicate predicate) + : tensor_(tensor), predicate_(std::move(predicate)) {} + + template + bool operator()(const Place& place) const { + framework::Tensor out; + out.Resize({1}); + out.mutable_data(place); + auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place); + AnyImpl(predicate_, tensor_, *ctx, &out); + return this->GetResult(out, place); + } + + bool GetResult(const framework::Tensor& out, + const platform::CUDAPlace& gpu) const { + platform::CPUPlace cpu; + framework::Tensor tmp; + tmp.Resize({1}); + tmp.mutable_data(cpu); + auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu); + gpuctx->Wait(); + CopyFrom(out, cpu, *gpuctx, &tmp); + gpuctx->Wait(); + return GetResult(tmp, cpu); + } + + bool GetResult(const framework::Tensor& out, + const platform::CPUPlace& cpu) const { + return *out.data(); + } +}; + +template +inline bool Any(const framework::Tensor& tensor, Predicate predicate) { + AnyVisitor visitor(tensor, predicate); + auto place = tensor.place(); + return platform::VisitPlace(place, visitor); +} + +struct HasNANPredicate { + template + auto operator()(const T& eigen_vec) const + -> decltype(std::declval().isnan()) { + // Cast eigen_vector to vector of bool. true if is inf. + return eigen_vec.isnan(); + } +}; + +bool HasNAN(const framework::Tensor& tensor) { + HasNANPredicate predicate; + return Any(tensor, predicate); +} + +struct HasInfPredicate { + template + auto operator()(const T& eigen_vec) const + -> decltype(std::declval().isinf()) { + // Cast eigen_vector to vector of bool. true if is inf. + return eigen_vec.isinf(); + } +}; + +bool HasInf(const framework::Tensor& tensor) { + HasInfPredicate predicate; + return Any(tensor, predicate); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor_util.cu b/paddle/framework/tensor_util.cu new file mode 120000 index 0000000000000000000000000000000000000000..b00e6e59d93328bf3142597ea4de0dc225501e56 --- /dev/null +++ b/paddle/framework/tensor_util.cu @@ -0,0 +1 @@ +./tensor_util.cc \ No newline at end of file diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h index 4e34b90d57eed8fea84b83045df61a98483c8849..5ac13cba4dae9c058e2d96da24dab01f44ece772 100644 --- a/paddle/framework/tensor_util.h +++ b/paddle/framework/tensor_util.h @@ -1,19 +1,23 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once +#include "paddle/framework/data_type.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/framework.pb.h" #include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace framework { @@ -33,6 +37,7 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, src.check_memory_size(); dst->Resize(src.dims()); + dst->set_layout(src.layout()); auto src_place = src.place(); auto src_ptr = src.data(); @@ -47,11 +52,11 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, #ifdef PADDLE_WITH_CUDA else if (platform::is_gpu_place(src_place) && // NOLINT platform::is_cpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); + auto src_gpu_place = boost::get(src_place); auto dst_cpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); + auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); memory::Copy( dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, @@ -59,21 +64,21 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, } else if (platform::is_cpu_place(src_place) && platform::is_gpu_place(dst_place)) { auto src_cpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); + auto dst_gpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); + auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); memory::Copy( dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, reinterpret_cast(ctx).stream()); } else if (platform::is_gpu_place(src_place) && platform::is_gpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); + auto src_gpu_place = boost::get(src_place); + auto dst_gpu_place = boost::get(dst_place); auto ctx_place = ctx.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); + auto ctx_gpu_place = boost::get(ctx_place); PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, @@ -82,6 +87,29 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, #endif } +/** + * @brief CopyFrom support CPU <-> CPU + */ +inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, + Tensor* dst) { + src.check_memory_size(); + dst->Resize(src.dims()); + dst->set_layout(src.layout()); + + auto src_place = src.place(); + auto src_ptr = src.data(); + + auto dst_ptr = dst->mutable_data(dst_place, src.type()); + + auto size = src.numel() * SizeOfType(src.type()); + + PADDLE_ENFORCE(platform::is_cpu_place(src_place) && + platform::is_cpu_place(dst_place)); + + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size); +} + /** * @brief Copy the content of an external vector to a tensor. * @@ -108,13 +136,28 @@ inline void CopyFromVector(const std::vector& src, #ifdef PADDLE_WITH_CUDA else if (platform::is_gpu_place(dst_place)) { // NOLINT memory::Copy( - boost::get(dst_place), dst_ptr, src_place, src_ptr, + boost::get(dst_place), dst_ptr, src_place, src_ptr, size, reinterpret_cast(ctx).stream()); } #endif } +/** + * @brief CopyFromVector CPU vector -> CPU Tensor + */ +template +inline void CopyFromVector(const std::vector& src, Tensor* dst) { + platform::CPUPlace dst_place = platform::CPUPlace(); + auto src_ptr = static_cast(src.data()); + platform::CPUPlace src_place; + dst->Resize({static_cast(src.size())}); + auto dst_ptr = static_cast(dst->mutable_data(dst_place)); + auto size = src.size() * sizeof(T); + + memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); +} + /** * @brief Copy the content of a tensor to a vector * @@ -141,12 +184,151 @@ inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx, #ifdef PADDLE_WITH_CUDA else if (platform::is_gpu_place(src.place())) { // NOLINT memory::Copy( - dst_place, dst_ptr, boost::get(src.place()), + dst_place, dst_ptr, boost::get(src.place()), src_ptr, size, reinterpret_cast(ctx).stream()); } #endif } +/** + * @brief CopyToVector CPUTensor <-> CPU Vector + */ +template +inline void CopyToVector(const Tensor& src, std::vector* dst) { + auto src_ptr = static_cast(src.data()); + auto size = src.numel() * sizeof(T); + + platform::CPUPlace dst_place; + dst->resize(src.numel()); + auto dst_ptr = static_cast(dst->data()); + + PADDLE_ENFORCE(platform::is_cpu_place(src.place())); + + memory::Copy(dst_place, dst_ptr, boost::get(src.place()), + src_ptr, size); +} + +// Returns true if a tensor contains NAN, i.e., Not A Number. +bool HasNAN(const framework::Tensor& tensor); + +// Returns true if a tensor contains Inf, i.e., Infinity. +bool HasInf(const framework::Tensor& tensor); + +inline void SerializeToStream(std::ostream& os, const Tensor& tensor, + const platform::DeviceContext& dev_ctx) { + // TODO(typhoonzero): serialize to ostream + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + os.write(reinterpret_cast(&version), sizeof(version)); + } + { // the 2nd field, tensor description + // int32_t size + // void* protobuf message + proto::TensorDesc desc; + desc.set_data_type(framework::ToDataType(tensor.type())); + auto dims = framework::vectorize(tensor.dims()); + auto* pb_dims = desc.mutable_dims(); + pb_dims->Resize(static_cast(dims.size()), 0); + std::copy(dims.begin(), dims.end(), pb_dims->begin()); + int32_t size = desc.ByteSize(); + os.write(reinterpret_cast(&size), sizeof(size)); + auto out = desc.SerializeAsString(); + os.write(out.data(), size); + } + { // the 3rd field, tensor data + uint64_t size = tensor.memory_size(); + auto* data_ptr = tensor.data(); + PADDLE_ENFORCE(size < std::numeric_limits::max(), + "Index overflow when writing tensor"); + if (platform::is_gpu_place(tensor.place())) { +#ifdef PADDLE_WITH_CUDA + constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB + std::unique_ptr buf(new char[kBufSize]); + auto& gpu_dev_ctx = + static_cast(dev_ctx); + platform::CPUPlace cpu; + uintptr_t data = reinterpret_cast(data_ptr); + while (size != 0) { + size_t size_to_write = std::min(kBufSize, static_cast(size)); + memory::Copy(cpu, buf.get(), + boost::get(tensor.place()), + reinterpret_cast(data), size_to_write, + gpu_dev_ctx.stream()); + gpu_dev_ctx.Wait(); + os.write(buf.get(), size_to_write); + data += size_to_write; + size -= size_to_write; + } +#else + PADDLE_THROW("Unexpected branch"); +#endif + } else { + os.write(static_cast(data_ptr), + static_cast(size)); + } + } +} + +struct DeserializedDataFunctor { + DeserializedDataFunctor(void** buf, Tensor* tensor, + const platform::Place& place) + : buf_(buf), tensor_(tensor), place_(place) {} + + template + void operator()() { + *buf_ = tensor_->mutable_data(place_); + } + + void** buf_; + Tensor* tensor_; + platform::Place place_; +}; + +inline void DeserializeFromStream(std::istream& is, Tensor* tensor, + const platform::DeviceContext& dev_ctx) { + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + proto::TensorDesc desc; + { // int32_t size + // proto buffer + int32_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::unique_ptr buf(new char[size]); + is.read(reinterpret_cast(buf.get()), size); + PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), + "Cannot parse tensor desc"); + } + { // read tensor + std::vector dims; + dims.reserve(static_cast(desc.dims().size())); + std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); + tensor->Resize(framework::make_ddim(dims)); + void* buf; + auto ctx = platform::CPUDeviceContext(); + if (platform::is_gpu_place(dev_ctx.GetPlace())) { +#ifdef PADDLE_WITH_CUDA + Tensor cpu_tensor; + cpu_tensor.Resize(framework::make_ddim(dims)); + framework::VisitDataType( + desc.data_type(), + DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace())); + is.read(static_cast(buf), cpu_tensor.memory_size()); + auto cpu_place = new platform::CPUPlace(); + framework::CopyFrom(cpu_tensor, *cpu_place, dev_ctx, tensor); + delete cpu_place; +#else + PADDLE_THROW("Unexpected branch"); +#endif + } else { + framework::VisitDataType( + desc.data_type(), + DeserializedDataFunctor(&buf, tensor, ctx.GetPlace())); + is.read(static_cast(buf), tensor->memory_size()); + } + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc index 03a70de182d0eb499a81413d38229c81c4378b91..15cd2bd09c4a34bc7a5bb8645762a3e0aaefd713 100644 --- a/paddle/framework/tensor_util_test.cc +++ b/paddle/framework/tensor_util_test.cc @@ -13,10 +13,12 @@ #include "paddle/framework/tensor_util.h" #include +#include #include namespace paddle { namespace framework { + TEST(CopyFrom, Tensor) { Tensor src_tensor; Tensor dst_tensor; @@ -27,9 +29,10 @@ TEST(CopyFrom, Tensor) { int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; memcpy(src_ptr, arr, 9 * sizeof(int)); + src_tensor.set_layout(DataLayout::kAnyLayout); auto cpu_place = new platform::CPUPlace(); - CopyFrom(src_tensor, *cpu_place, cpu_ctx, &dst_tensor); + CopyFrom(src_tensor, *cpu_place, &dst_tensor); const int* dst_ptr = dst_tensor.data(); ASSERT_NE(src_ptr, dst_ptr); @@ -37,14 +40,18 @@ TEST(CopyFrom, Tensor) { EXPECT_EQ(src_ptr[i], dst_ptr[i]); } + EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout()); + Tensor slice_tensor = src_tensor.Slice(1, 2); - CopyFrom(slice_tensor, *cpu_place, cpu_ctx, &dst_tensor); + CopyFrom(slice_tensor, *cpu_place, &dst_tensor); const int* slice_ptr = slice_tensor.data(); dst_ptr = dst_tensor.data(); ASSERT_NE(dst_ptr, slice_ptr); for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } + EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout()); + #ifdef PADDLE_WITH_CUDA { Tensor src_tensor; @@ -58,7 +65,7 @@ TEST(CopyFrom, Tensor) { memcpy(src_ptr, arr, 9 * sizeof(int)); // CPU Tensor to GPU Tensor - auto gpu_place = new platform::GPUPlace(0); + auto gpu_place = new platform::CUDAPlace(0); platform::CUDADeviceContext gpu_ctx(*gpu_place); CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); @@ -90,6 +97,8 @@ TEST(CopyFrom, Tensor) { for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } + + EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout()); } #endif } @@ -104,8 +113,7 @@ TEST(CopyFromVector, Tensor) { // Copy to CPU Tensor cpu_tensor.Resize(make_ddim({3, 3})); auto cpu_place = new paddle::platform::CPUPlace(); - CPUDeviceContext cpu_ctx(*cpu_place); - CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + CopyFromVector(src_vec, &cpu_tensor); // Compare Tensors const int* cpu_ptr = cpu_tensor.data(); @@ -117,7 +125,7 @@ TEST(CopyFromVector, Tensor) { src_vec.erase(src_vec.begin(), src_vec.begin() + 5); cpu_tensor.Resize(make_ddim({2, 2})); - CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + CopyFromVector(src_vec, &cpu_tensor); cpu_ptr = cpu_tensor.data(); src_ptr = src_vec.data(); ASSERT_NE(src_ptr, cpu_ptr); @@ -143,7 +151,7 @@ TEST(CopyFromVector, Tensor) { // Copy to GPUTensor gpu_tensor.Resize(make_ddim({3, 3})); - auto gpu_place = new paddle::platform::GPUPlace(); + auto gpu_place = new paddle::platform::CUDAPlace(); CUDADeviceContext gpu_ctx(*gpu_place); CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); // Copy from GPU to CPU tensor for comparison @@ -198,9 +206,8 @@ TEST(CopyToVector, Tensor) { } CPUPlace place; - CPUDeviceContext cpu_ctx(place); std::vector dst; - CopyToVector(src, cpu_ctx, &dst); + CopyToVector(src, &dst); for (int i = 0; i < 3 * 3; ++i) { EXPECT_EQ(src_ptr[i], dst[i]); @@ -210,7 +217,7 @@ TEST(CopyToVector, Tensor) { { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; Tensor gpu_tensor; - GPUPlace place; + CUDAPlace place; CUDADeviceContext gpu_ctx(place); CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); @@ -224,5 +231,78 @@ TEST(CopyToVector, Tensor) { #endif } +TEST(HasNAN, CPU) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor src; + float* buf = src.mutable_data({3}, CPUPlace()); + buf[0] = 0.0; + buf[1] = NAN; + buf[2] = 0.0; + + ASSERT_TRUE(HasNAN(src)); +} + +TEST(HasInf, CPU) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor src; + double* buf = src.mutable_data({3}, CPUPlace()); + buf[0] = 1.0; + buf[1] = INFINITY; + buf[2] = 0.0; + ASSERT_TRUE(HasInf(src)); +} + +TEST(Tensor, SerializeAndDeserialize) { + framework::Tensor src_tensor; + int array[6] = {1, 2, 3, 4, 5, 6}; + src_tensor.Resize({2, 3}); + int* src_ptr = src_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < 6; ++i) { + src_ptr[i] = array[i]; + } + { + framework::Tensor dst_tensor; + auto place = new platform::CPUPlace(); + platform::CPUDeviceContext cpu_ctx(*place); + std::ostringstream oss; + SerializeToStream(oss, src_tensor, cpu_ctx); + + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor, cpu_ctx); + int* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(dst_ptr[i], array[i]); + } + ASSERT_EQ(dst_tensor.dims(), src_tensor.dims()); + delete place; + } +#ifdef PADDLE_WITH_CUDA + { + Tensor gpu_tensor; + gpu_tensor.Resize({2, 3}); + Tensor dst_tensor; + + auto gpu_place = new platform::CUDAPlace(); + platform::CUDADeviceContext gpu_ctx(*gpu_place); + + CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); + + std::ostringstream oss; + SerializeToStream(oss, gpu_tensor, gpu_ctx); + + std::istringstream iss(oss.str()); + DeserializeFromStream(iss, &dst_tensor, gpu_ctx); + + int* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); + for (int i = 0; i < 6; ++i) { + ASSERT_EQ(dst_ptr[i], array[i]); + } + delete gpu_place; + } +#endif +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_util_test.cu b/paddle/framework/tensor_util_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..ebd35fdf6c2a1388fec23057070f723c8ef9da9c --- /dev/null +++ b/paddle/framework/tensor_util_test.cu @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "gtest/gtest.h" +#include "paddle/framework/tensor_util.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace framework { + +static __global__ void FillNAN(float* buf) { + buf[0] = 0.0; + buf[1] = 0.1; + buf[2] = NAN; +} +static __global__ void FillInf(float* buf) { + buf[0] = 0.0; + buf[1] = INFINITY; + buf[2] = 0.5; +} + +TEST(HasNAN, GPU) { + Tensor tensor; + platform::CUDAPlace gpu(0); + auto& pool = platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + float* buf = tensor.mutable_data({3}, gpu); + FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + ASSERT_TRUE(HasNAN(tensor)); +} + +TEST(HasInf, GPU) { + Tensor tensor; + platform::CUDAPlace gpu(0); + auto& pool = platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + float* buf = tensor.mutable_data({3}, gpu); + FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + ASSERT_TRUE(HasInf(tensor)); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/threadpool.cc b/paddle/framework/threadpool.cc new file mode 100644 index 0000000000000000000000000000000000000000..109a7e7dc440d91e8223f2c0924f489f54a06f64 --- /dev/null +++ b/paddle/framework/threadpool.cc @@ -0,0 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/threadpool.h" + +namespace paddle { +namespace framework { + +std::unique_ptr ThreadPool::threadpool(nullptr); +std::once_flag ThreadPool::init_flag; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h new file mode 100644 index 0000000000000000000000000000000000000000..3ac345851c38557f82698786dd3bc8e1202a4256 --- /dev/null +++ b/paddle/framework/threadpool.h @@ -0,0 +1,171 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace framework { + +class ThreadPool { + public: + typedef std::packaged_task Task; + + /** + * @brief Get a instance of threadpool, the thread number will + * be specified as the number of hardware thread contexts + */ + static ThreadPool* GetInstance() { + std::call_once(init_flag, &ThreadPool::Init); + return threadpool.get(); + } + + ~ThreadPool() { + { + // notify all threads to stop running + running_ = false; + scheduled_.notify_all(); + } + + for (auto& t : threads_) { + t->join(); + t.reset(nullptr); + } + } + + int GetNumThreads() const { return num_threads_; } + + int GetAvailable() { + std::unique_lock lock(mutex_); + return available_; + } + + /** + * @brief Push a function to the queue, and will be scheduled and + * executed if a thread is available. + * @param[in] Task, will be pushed to the task queue. + * @return std::future, we could wait for the task finished by + * f.wait(). + */ + template + std::future Run(Callback fn) { + std::unique_lock lock(mutex_); + Task task(std::bind(fn)); + std::future f = task.get_future(); + tasks_.push(std::move(task)); + lock.unlock(); + scheduled_.notify_one(); + return f; + } + + /** + * @brief Wait until all the tasks are completed. + */ + void Wait() { + std::unique_lock lock(mutex_); + completed_.wait(lock, [=] { return Done() == true; }); + } + + private: + DISABLE_COPY_AND_ASSIGN(ThreadPool); + + explicit ThreadPool(int num_threads) + : num_threads_(num_threads), available_(num_threads), running_(true) { + threads_.resize(num_threads); + for (auto& thread : threads_) { + // TODO(Yancey1989): binding the thread on the specify CPU number + thread.reset(new std::thread(std::bind(&ThreadPool::TaskLoop, this))); + } + } + + /** + * @brief If the task queue is empty and avaialbe + * is equal to the number of threads, means that + * all tasks are completed. + * + * Note: this function is not thread-safe. + * + * @return true if all tasks are completed. + */ + bool Done() { return tasks_.empty() && available_ == num_threads_; } + + void TaskLoop() { + while (running_) { + std::unique_lock lock(mutex_); + scheduled_.wait(lock, [=] { return !tasks_.empty() || !running_; }); + + if (!running_) { + break; + } + // pop a task from the task queue + auto task = std::move(tasks_.front()); + tasks_.pop(); + + --available_; + lock.unlock(); + + // run the task + task(); + + { + std::unique_lock lock(mutex_); + ++available_; + if (Done()) { + completed_.notify_all(); + } + } + } + } + + static void Init() { + if (threadpool.get() == nullptr) { + // TODO(Yancey1989): specify the max threads number + int num_threads = std::thread::hardware_concurrency(); + PADDLE_ENFORCE_GT(num_threads, 0); + threadpool.reset(new ThreadPool(num_threads)); + } + } + + private: + static std::unique_ptr threadpool; + static std::once_flag init_flag; + + int num_threads_; + int available_; + bool running_; + std::queue tasks_; + std::vector> threads_; + std::mutex mutex_; + std::condition_variable scheduled_; + std::condition_variable completed_; +}; + +// Run a function asynchronously. +// NOTE: The function must return void. If the function need to return a value, +// you can use lambda to capture a value pointer. +template +std::future Async(Callback callback) { + return ThreadPool::GetInstance()->Run(callback); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/threadpool_test.cc b/paddle/framework/threadpool_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..50b6238cd8786be9d8cf2d5f821daadea12bd208 --- /dev/null +++ b/paddle/framework/threadpool_test.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "threadpool.h" + +namespace framework = paddle::framework; + +void do_sum(framework::ThreadPool* pool, std::atomic& sum, int cnt) { + std::vector> fs; + for (int i = 0; i < cnt; ++i) { + auto f = pool->Run([&sum]() { sum.fetch_add(1); }); + fs.push_back(std::move(f)); + } + for (auto& f : fs) { + f.wait(); + } +} + +TEST(ThreadPool, ConcurrentInit) { + framework::ThreadPool* pool; + int n = 50; + std::vector threads; + for (int i = 0; i < n; ++i) { + std::thread t([&pool]() { pool = framework::ThreadPool::GetInstance(); }); + threads.push_back(std::move(t)); + } + for (auto& t : threads) { + t.join(); + } +} + +TEST(ThreadPool, ConcurrentRun) { + framework::ThreadPool* pool = framework::ThreadPool::GetInstance(); + std::atomic sum(0); + std::vector threads; + int n = 50; + // sum = (n * (n + 1)) / 2 + for (int i = 1; i <= n; ++i) { + std::thread t(do_sum, pool, std::ref(sum), i); + threads.push_back(std::move(t)); + } + for (auto& t : threads) { + t.join(); + } + pool->Wait(); + EXPECT_EQ(sum, ((n + 1) * n) / 2); +} diff --git a/paddle/framework/type_defs.h b/paddle/framework/type_defs.h index da152e8b9d23490e2e69c2dd215c45355e1c1e44..d834d343759fa279a1444c6337956ffce1b9061a 100644 --- a/paddle/framework/type_defs.h +++ b/paddle/framework/type_defs.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/framework/var_desc.cc b/paddle/framework/var_desc.cc index bd8973eeb369aabd2c52d4fccf799657c564ee78..aeab18d7214f8d9dd79bc3d2e0322490445b3b49 100644 --- a/paddle/framework/var_desc.cc +++ b/paddle/framework/var_desc.cc @@ -52,7 +52,7 @@ void VarDesc::SetLoDLevel(int32_t lod_level) { } } -int32_t VarDesc::GetLodLevel() const { +int32_t VarDesc::GetLoDLevel() const { switch (desc_.type()) { case proto::VarDesc::LOD_TENSOR: return desc_.lod_tensor().lod_level(); @@ -74,7 +74,7 @@ const proto::TensorDesc &VarDesc::tensor_desc() const { case proto::VarDesc::LOD_TENSOR_ARRAY: return desc_.tensor_array().tensor(); default: - PADDLE_THROW("Unexpected branch."); + PADDLE_THROW("The type of var '", this->Name(), "' is unsupported."); } } diff --git a/paddle/framework/var_desc.h b/paddle/framework/var_desc.h index 4fd2abe7fb215c3ac454de3e30754685111eb570..fc482c467404a6b9dfed64c43871d91d3d10c766 100644 --- a/paddle/framework/var_desc.h +++ b/paddle/framework/var_desc.h @@ -76,7 +76,7 @@ class VarDesc { void SetLoDLevel(int32_t lod_level); - int32_t GetLodLevel() const; + int32_t GetLoDLevel() const; proto::VarDesc::VarType GetType() const; diff --git a/paddle/framework/var_type.h b/paddle/framework/var_type.h index 43a72276408bdefc329e8ddcd901ba346aba35f3..5b7a08a08732a6ccbc206f6a4f0aa4788ce4a219 100644 --- a/paddle/framework/var_type.h +++ b/paddle/framework/var_type.h @@ -1,22 +1,24 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor_array.h" +#include "paddle/framework/selected_rows.h" +#include "paddle/framework/variable.h" namespace paddle { namespace framework { @@ -35,7 +37,7 @@ inline proto::VarDesc::VarType ToVarType(std::type_index type) { } template -inline void VisitVarType(const Variable& var, Visitor visitor) { +inline void VisitVarType(const framework::Variable& var, Visitor visitor) { switch (ToVarType(var.Type())) { case proto::VarDesc_VarType_LOD_TENSOR: visitor(var.Get()); diff --git a/paddle/framework/var_type_inference.h b/paddle/framework/var_type_inference.h index 1a4dca05f741f33d58eeccda9d1f800aadb8c01f..6c11f2fee7f554fb008f559bb33aeafea5c5a556 100644 --- a/paddle/framework/var_type_inference.h +++ b/paddle/framework/var_type_inference.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/type_defs.h" diff --git a/paddle/framework/var_type_inference_test.cc b/paddle/framework/var_type_inference_test.cc index 92f333c558413ac3253c0fb8a20d6f0cfa33f99c..fa6018b1c583abaccb0259b82c9bb61c0fc10820 100644 --- a/paddle/framework/var_type_inference_test.cc +++ b/paddle/framework/var_type_inference_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/var_type_inference.h" #include "gtest/gtest.h" diff --git a/paddle/function/GemmConvOp.cpp b/paddle/function/GemmConvOp.cpp index de7b70e271b38ebe3a4c38704d0cced47d010788..cbdbf5335d32d55a0221728758025c9d2cb3e7d1 100644 --- a/paddle/function/GemmConvOp.cpp +++ b/paddle/function/GemmConvOp.cpp @@ -126,14 +126,165 @@ public: inputData += inputChannels * inputHeight * inputWidth; outputData += outputChannels * outputHeight * outputWidth; } + } +}; + #ifdef PADDLE_MOBILE_INFERENCE - if (Device == DEVICE_TYPE_CPU) { - memory_.reset(); + +/* + * \brief Forward calculation of convolution, optimized for mobile. + */ +template +class GemmConvMobileFunction : public ConvFunctionBase { +public: + void init(const FuncConfig& config) override { + ConvFunctionBase::init(config); + } + + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + checkShape(input, filter, output); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + check(inputs, outputs); + // TODO(hedaoyuan): Need to define some index macros, + // to avoid useing 0 and 1. + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + + real beta; + if (outputs[0].getArgType() == ADD_TO) { + beta = 1.0; + } else { + beta = 0.0; } -#endif + + size_t batchSize = input[0]; + size_t inputChannels = input[1]; + size_t inputHeight = input[2]; + size_t inputWidth = input[3]; + size_t filterHeight = getFilterHeight(filter); + size_t filterWidth = getFilterWidth(filter); + size_t outputChannels = output[1]; + size_t outputHeight = output[2]; + size_t outputWidth = output[3]; + + real* inputData = inputs[0].data(); + real* filterData = inputs[1].data(); + real* outputData = outputs[0].data(); + bool needIm2col = isNeedIm2col(filter); + + TensorShape imShape = + TensorShape({inputChannels / groups_, inputHeight, inputWidth}); + + TensorShape colShape; + real* colData = NULL; + + size_t colHeight = inputChannels / groups_ * filterHeight * filterWidth; + size_t colWidth = outputHeight * outputWidth; + // Max col matrix height 256, Max col matrix width 1024 + size_t stepColHeight = std::min(colHeight, static_cast(256)); + size_t stepColWidth = std::min(colWidth, static_cast(2048)); + + if (needIm2col) { + colShape = TensorShape({inputChannels / groups_, + filterHeight, + filterWidth, + outputHeight, + outputWidth}); + + resizeBuffer(stepColHeight * stepColWidth * sizeof(real)); + colData = reinterpret_cast(memory_->getBuf()); + } + + Im2ColMobileFunctor im2col; + size_t inputOffset = imShape.getElements(); + size_t outputOffset = + (outputChannels / groups_) * outputHeight * outputWidth; + size_t filterOffset = filter.getElements() / groups_; + + int nStride = colWidth; + int kStride = colHeight; + for (size_t i = 0; i < batchSize; i++) { + for (size_t g = 0; g < groups_; g++) { + if (needIm2col) { + real beta_ = beta; + for (size_t colHeightStart = 0; colHeightStart < colHeight; + colHeightStart += stepColHeight) { + for (size_t colWidthStart = 0; colWidthStart < colWidth; + colWidthStart += stepColWidth) { + int N = std::min(colWidth - colWidthStart, stepColWidth); + int K = std::min(colHeight - colHeightStart, stepColHeight); + // im2col + im2col(inputData + g * inputOffset, + imShape, + colData, + colShape, + strideH(), + strideW(), + paddingH(), + paddingW(), + dilationH(), + dilationW(), + colHeightStart, + K, + colWidthStart, + N); + + // gemm + int M = outputChannels / groups_; + BlasGemm::compute( + false, + false, + M, + N, + K, + 1.0f, + filterData + g * filterOffset + colHeightStart, + kStride, + colData, + N, + beta_, + outputData + g * outputOffset + colWidthStart, + nStride); + } + beta_ = 1.0; + } + } else { + int M = outputChannels / groups_; + int N = outputHeight * outputWidth; + int K = inputChannels / groups_ * filterHeight * filterWidth; + BlasGemm::compute(false, + false, + M, + N, + K, + 1.0f, + filterData + g * filterOffset, + K, + inputData + g * inputOffset, + N, + beta, + outputData + g * outputOffset, + N); + } + } + inputData += inputChannels * inputHeight * inputWidth; + outputData += outputChannels * outputHeight * outputWidth; + } + + memory_.reset(); } }; +#endif + /* * \brief Backward input calculation of convolution. */ @@ -348,7 +499,11 @@ public: } }; +#ifdef PADDLE_MOBILE_INFERENCE +REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvMobileFunction); +#else REGISTER_TYPED_FUNC(GemmConv, CPU, GemmConvFunction); +#endif REGISTER_TYPED_FUNC(GemmConvGradInput, CPU, GemmConvGradInputFunction); REGISTER_TYPED_FUNC(GemmConvGradFilter, CPU, GemmConvGradFilterFunction); #ifdef PADDLE_WITH_CUDA diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h index 0c37fc972484bfbede01d23652e384071bf883af..36a9bcf84e4b14965c83627821b71d1c7c0da1b2 100644 --- a/paddle/function/Im2Col.h +++ b/paddle/function/Im2Col.h @@ -98,4 +98,54 @@ public: int dilationWidth = 1); }; +template +class Im2ColMobileFunctor { +public: + void operator()(const T* imData, + const TensorShape& imShape, + T* colData, + const TensorShape& colShape, + int strideHeight, + int strideWidth, + int paddingHeight, + int paddingWidth, + int dilationHeight, + int dilationWidth, + int colHeightStart, + int colHeightSize, + int colWidthStart, + int colWidthSize) { + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[1]; + int filterWidth = colShape[2]; + int outputWidth = colShape[4]; + + for (int colh = 0; colh < colHeightSize; colh++) { + int wOffset = (colHeightStart + colh) % filterWidth; + int hOffset = ((colHeightStart + colh) / filterWidth) % filterHeight; + int c_im = (colHeightStart + colh) / filterWidth / filterHeight; + + for (int colw = 0; colw < colWidthSize; colw++) { + int h = (colWidthStart + colw) / outputWidth; + int w = (colWidthStart + colw) % outputWidth; + + int imRowIdx = h * strideHeight + hOffset * dilationHeight; + int imColIdx = w * strideWidth + wOffset * dilationWidth; + if ((imRowIdx - paddingHeight) < 0 || + (imRowIdx - paddingHeight) >= inputHeight || + (imColIdx - paddingWidth) < 0 || + (imColIdx - paddingWidth) >= inputWidth) { + colData[colh * colWidthSize + colw] = static_cast(0); + } else { + imRowIdx += c_im * inputHeight - paddingHeight; + imColIdx -= paddingWidth; + colData[colh * colWidthSize + colw] = + imData[imRowIdx * inputWidth + imColIdx]; + } + } + } + } +}; + } // namespace paddle diff --git a/paddle/function/Im2ColTest.cpp b/paddle/function/Im2ColTest.cpp index 1f085538d81904dbd5b5d6bcd014adaed22e37d7..3ba866dcdd845403d52f7a85adfef08cbb11c305 100644 --- a/paddle/function/Im2ColTest.cpp +++ b/paddle/function/Im2ColTest.cpp @@ -138,4 +138,86 @@ TEST(Im2ColFunctor, GPU) { TestIm2ColFunctor(); } #endif +template +void TestIm2ColMobileFunctor() { + for (size_t channels : {32}) { + for (size_t inputHeight : {33, 100}) { + for (size_t inputWidth : {32, 96}) { + for (size_t filterHeight : {5}) { + for (size_t filterWidth : {7}) { + for (size_t stride : {2}) { + for (size_t padding : {1}) { + for (size_t dilation : {1, 3}) { + size_t filterSizeH = (filterHeight - 1) * dilation + 1; + size_t filterSizeW = (filterWidth - 1) * dilation + 1; + if (inputHeight + 2 * padding < filterSizeH || + inputWidth + 2 * padding < filterSizeW) + break; + if (padding >= filterSizeH || padding >= filterSizeW) break; + size_t outputHeight = + (inputHeight - filterSizeH + 2 * padding) / stride + 1; + size_t outputWidth = + (inputWidth - filterSizeW + 2 * padding) / stride + 1; + + TensorShape imShape = + TensorShape({channels, inputHeight, inputWidth}); + TensorShape colShape1 = TensorShape({channels, + filterHeight, + filterWidth, + outputHeight, + outputWidth}); + + size_t height = channels * filterHeight * filterWidth; + size_t width = outputHeight * outputWidth; + VectorPtr input1 = + Vector::create(imShape.getElements(), false); + VectorPtr input2 = + Vector::create(imShape.getElements(), false); + MatrixPtr output1 = + Matrix::create(height, width, false, false); + MatrixPtr output2 = + Matrix::create(height, width, false, false); + input1->uniform(0.001, 1); + input2->copyFrom(*input1); + + Im2ColFunctor im2Col1; + Im2ColMobileFunctor im2Col2; + im2Col1(input1->getData(), + imShape, + output1->getData(), + colShape1, + stride, + stride, + padding, + padding, + dilation, + dilation); + im2Col2(input2->getData(), + imShape, + output2->getData(), + colShape1, + stride, + stride, + padding, + padding, + dilation, + dilation, + 0, + height, + 0, + width); + + autotest::TensorCheckEqual(*output1, *output2); + } + } + } + } + } + } + } + } +} + +TEST(Im2ColFunctor, Mobile) { TestIm2ColMobileFunctor(); } + } // namespace paddle diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 41ead3c5ecef248830cfb0f8be360f21dcd58e7b..3d6ced713f00bd72622d8aeed3967642b6774ffe 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -34,6 +34,16 @@ else() message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") endif() +if(NOT WITH_MKLML) + file(GLOB_RECURSE MKL_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.h") + file(GLOB_RECURSE MKL_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.cpp") + list(REMOVE_ITEM GSERVER_HEADER ${MKL_HEADER}) + list(REMOVE_ITEM GSERVER_SOURCES ${MKL_SOURCES}) + message(STATUS "Skip compiling with MKLPackedLayers") +else() + message(STATUS "Compile with MKLPackedLayers") +endif() + if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER layers/CudnnConvBaseLayer.h diff --git a/paddle/gserver/layers/MKLDNNLRNLayer.cpp b/paddle/gserver/layers/MKLDNNLRNLayer.cpp index 741984bb68d3881f6ac26eaca7790190ed6e572a..ac217f1363dbd0360645bbe07cd71a17cc931a79 100644 --- a/paddle/gserver/layers/MKLDNNLRNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLRNLayer.cpp @@ -29,7 +29,7 @@ bool MKLDNNLRNLayer::init(const LayerMap& layerMap, } /* the size of inputs for norm-layer is 1 */ - CHECK_EQ(config_.inputs_size(), 1UL); + CHECK_EQ(config_.inputs_size(), 1); const NormConfig& conf = config_.inputs(0).norm_conf(); localSize_ = conf.size(); alpha_ = conf.scale(); diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dd75555fae134664d92ba9f8ffdea8af78166b7e --- /dev/null +++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLPackedRecurrentLayer.h" + +namespace paddle { + +REGISTER_LAYER(mkl_packed_recurrent, MKLPackedRecurrentLayer); + +bool MKLPackedRecurrentLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!RecurrentLayer::init(layerMap, parameterMap)) return false; + packed_weight_.reset(new MKLPackedWeight(weight_->getW())); + packed_weight_->pack(); + if (needGradient_) { + packed_weightT_.reset(new MKLPackedWeight(weight_->getW(), true)); + packed_weightT_->pack(); + } + return true; +} + +void MKLPackedRecurrentLayer::backward(const UpdateCallback& callback) { + RecurrentLayer::backward(callback); + packed_weight_->pack(); + if (needGradient_) { + packed_weightT_->pack(); + } +} + +void MKLPackedRecurrentLayer::forwardBatch(int batchSize, + size_t numSequences, + const int* starts) { + if (!batchValue_) { + batchValue_.reset(new SequenceToBatch(useGpu_)); + } + + batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_); + + batchValue_->copyFromSeq(*output_.value); + + { + REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str()); + /* forward one batch */ + for (size_t n = 0; n < batchValue_->getNumBatch(); n++) { + MatrixPtr batchValue = batchValue_->getBatchValue(n); + + if (n != 0) { + MatrixPtr preBatchValue = + batchValue_->getBatchValue(n - 1, batchValue->getHeight()); + + packed_weight_->gemm_compute(preBatchValue, batchValue); + } + Argument arg; + arg.value = batchValue; + activation_->forward(arg).check(); + } + } + batchValue_->copyBackSeq(*output_.value); +} + +void MKLPackedRecurrentLayer::backwardBatch(int batchSize, + size_t numSequences, + const int* starts) { + if (!batchGrad_) { + batchGrad_.reset(new SequenceToBatch(useGpu_)); + } + batchGrad_->shareIndexWith(*batchValue_); + + size_t numBatch = batchGrad_->getNumBatch(); + bool backwardByBatch = numBatch < numSequences; + + batchGrad_->copyFromSeq(*output_.grad); + { + REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str()); + /* backward one batch */ + for (int n = (int)numBatch - 1; n >= 0; n--) { + MatrixPtr batchGrad = batchGrad_->getBatchValue(n); + MatrixPtr batchValue = + batchValue_->getBatchValue(n, batchGrad->getHeight()); + + Argument arg; + arg.value = batchValue; + arg.grad = batchGrad; + activation_->backward(arg).check(); + + if (n != 0) { + batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight()); + packed_weightT_->gemm_compute(batchGrad, batchValue); + } + + if (backwardByBatch && weight_->getWGrad()) { + if (n != 0) { + /* backward weight */ + batchValue = + batchValue_->getBatchValue(n - 1, batchGrad->getHeight()); + weight_->getWGrad()->mul( + *batchValue->getTranspose(), *batchGrad, 1, 1); + } + } + } + } + + batchGrad_->copyBackSeq(*output_.grad); + + if (!backwardByBatch && weight_->getWGrad()) { + REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str()); + for (size_t seq = 0; seq < numSequences; ++seq) { + int len = starts[seq + 1] - starts[seq]; + weight_->getWGrad()->mul( + *output_.value + ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1) + ->getTranspose(), + *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1, + len - 1), + 1, + 1); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.h b/paddle/gserver/layers/MKLPackedRecurrentLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..bded523a8fbd6ff18f28859bd2a1bf3c1a25e2a0 --- /dev/null +++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLPackedWeight.h" +#include "RecurrentLayer.h" + +DECLARE_bool(rnn_use_batch); + +namespace paddle { + +/** + * @brief MKLPackedRecurrentLayer is almost the same with RecurrentLayer + * but is optimized with MKL cblas packed gemm. + * More details: + * https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/mkl/mkl_packed.md + */ + +class MKLPackedRecurrentLayer : public RecurrentLayer { +public: + explicit MKLPackedRecurrentLayer(const LayerConfig& config) + : RecurrentLayer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void backward(const UpdateCallback& callback) override; + +protected: + void forwardBatch(int batchSize, + size_t numSequences, + const int* starts) override; + + void backwardBatch(int batchSize, + size_t numSequences, + const int* starts) override; + +protected: + /// packed_weight_ contains same data with + /// RecurrentLayer::weight_ but is packed + std::unique_ptr packed_weight_; + /// packed_weightT_ is the transposition matrix of packed_weight_ + std::unique_ptr packed_weightT_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLPackedWeight.h b/paddle/gserver/layers/MKLPackedWeight.h new file mode 100644 index 0000000000000000000000000000000000000000..15d5093beb43e2f086601c2616ace033da34f341 --- /dev/null +++ b/paddle/gserver/layers/MKLPackedWeight.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/math/MathFunctions.h" +#include "paddle/parameter/Parameter.h" +#include "paddle/parameter/Weight.h" + +namespace paddle { + +class MKLPackedWeight { +protected: + /// The pointer of weight + real *weight_; + /// The pointer of cblas packed gemm to weight + real *packedWeight_; + size_t height_; + size_t width_; + bool transW_; + +public: + explicit MKLPackedWeight(MatrixPtr weight, bool transW = false) { + packedWeight_ = nullptr; + weight_ = weight->getData(); + height_ = weight->getHeight(); + width_ = weight->getWidth(); + transW_ = transW; + } + + ~MKLPackedWeight() { free_(); } + + void pack() { pack_(weight_); } + + void gemm_compute(const MatrixPtr src, MatrixPtr dst) { + cblas_sgemm_compute(CblasRowMajor, + CblasNoTrans, + CblasPacked, + src->getHeight(), + transW_ ? height_ : width_, + transW_ ? width_ : height_, + src->getData(), + src->getWidth(), + packedWeight_, + width_, + 1.0, + dst->getData(), + dst->getWidth()); + } + +protected: + void pack_(real *src) { + if (!packedWeight_) { + packedWeight_ = cblas_sgemm_alloc(CblasBMatrix, 1, width_, height_); + } + cblas_sgemm_pack(CblasRowMajor, + CblasBMatrix, + transW_ ? CblasTrans : CblasNoTrans, + 1, + transW_ ? height_ : width_, + transW_ ? width_ : height_, + 1.0, + src, + width_, + packedWeight_); + } + + void free_() { + if (packedWeight_) { + cblas_sgemm_free(packedWeight_); + } + } +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index e4c2b483d2fa4032735858dab17647592791a9c7..6bd42c06cadf755e8703f3fc299d0e6248dd1478 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -12,119 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include "Layer.h" -#include "SequenceToBatch.h" -#include "paddle/utils/Stat.h" +#include "RecurrentLayer.h" DEFINE_bool(rnn_use_batch, false, "Using the batch method for calculation."); namespace paddle { -/** - * @brief RecurrentLayer takes 1 input layer. The output size is the same with - * input layer. - * For each sequence [start, end] it performs the following computation: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = start \\ - * out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end - * - * \f] - * If reversed is true, the order is reversed: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = end \\ - * out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end - * \f] - * There are two methods to calculate rnn. One way is to compute rnn one - * sequence by one sequence. The other way is to reorganize the input - * into batches, then compute rnn one batch by one batch. Users can select - * them by rnn_use_batch flag. - */ - -class RecurrentLayer : public Layer { -public: - explicit RecurrentLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback) override; - - void resetState() override; - - void setState(LayerStatePtr state) override; - - LayerStatePtr getState() override; - -protected: - /** - * @brief If user do not set --rnn_use_batch=true, it will - * compute rnn forward one sequence by one sequence in default. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void forwardSequence(int batchSize, size_t numSequences, const int* starts); - /** - * @brief Compute rnn forward by one sequence. - * @param start The start position of this sequence (or sample). - * @param length The length of this sequence (or sample), namely the words - * number of this sequence. - */ - void forwardOneSequence(int start, int length); - /** - * @brief Compute rnn backward one sequence by onesequence. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void backwardSequence(int batchSize, size_t numSequences, const int* starts); - /** - * @brief Compute rnn backward by one sequence. - * @param start The start position of this sequence (or sample). - * @param length The length of this sequence (or sample), namely the words - * number of this sequence. - */ - void backwardOneSequence(int start, int length); - - /** - * @brief Reorganize input into batches and compute rnn forward batch - * by batch. It will convert batch shape to sequence after finishing forward. - * The batch info can refer to SequenceToBatch class. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void forwardBatch(int batchSize, size_t numSequences, const int* starts); - - /** - * @brief Reorganize input into batches and compute rnn forward batch - * by batch. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void backwardBatch(int batchSize, size_t numSequences, const int* starts); - -protected: - std::unique_ptr weight_; - std::unique_ptr bias_; - - /// frameOutput_[i] is used to hold the i-th sample of output_ - std::vector frameOutput_; - MatrixPtr prevOutput_; - /// Whether compute rnn by reverse. - bool reversed_; - /// If compute batch by batch, batchValue_ will be used to save the - /// reorganized input value. - std::unique_ptr batchValue_; - /// If compute batch by batch, batchGrad_ will be used to save the - /// gradient with respect to reorganized input value. - std::unique_ptr batchGrad_; -}; - REGISTER_LAYER(recurrent, RecurrentLayer); bool RecurrentLayer::init(const LayerMap& layerMap, @@ -260,7 +153,6 @@ void RecurrentLayer::backward(const UpdateCallback& callback) { bias_->getWGrad()->collectBias(*output_.grad, 1); bias_->getParameterPtr()->incUpdate(callback); } - weight_->getParameterPtr()->incUpdate(callback); } diff --git a/paddle/gserver/layers/RecurrentLayer.h b/paddle/gserver/layers/RecurrentLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..f40dbe150fa93becfc26f6ea9e55e40eaf208860 --- /dev/null +++ b/paddle/gserver/layers/RecurrentLayer.h @@ -0,0 +1,130 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once +#include +#include "Layer.h" +#include "SequenceToBatch.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +/** + * @brief RecurrentLayer takes 1 input layer. The output size is the same with + * input layer. + * For each sequence [start, end] it performs the following computation: + * \f[ + * out_{i} = act(in_{i}) \ \ \text{for} \ i = start \\ + * out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end + * + * \f] + * If reversed is true, the order is reversed: + * \f[ + * out_{i} = act(in_{i}) \ \ \text{for} \ i = end \\ + * out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end + * \f] + * There are two methods to calculate rnn. One way is to compute rnn one + * sequence by one sequence. The other way is to reorganize the input + * into batches, then compute rnn one batch by one batch. Users can select + * them by rnn_use_batch flag. + */ + +class RecurrentLayer : public Layer { +public: + explicit RecurrentLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + + void backward(const UpdateCallback& callback) override; + + void resetState() override; + + void setState(LayerStatePtr state) override; + + LayerStatePtr getState() override; + +protected: + /** + * @brief If user do not set --rnn_use_batch=true, it will + * compute rnn forward one sequence by one sequence in default. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + void forwardSequence(int batchSize, size_t numSequences, const int* starts); + /** + * @brief Compute rnn forward by one sequence. + * @param start The start position of this sequence (or sample). + * @param length The length of this sequence (or sample), namely the words + * number of this sequence. + */ + void forwardOneSequence(int start, int length); + /** + * @brief Compute rnn backward one sequence by onesequence. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + void backwardSequence(int batchSize, size_t numSequences, const int* starts); + /** + * @brief Compute rnn backward by one sequence. + * @param start The start position of this sequence (or sample). + * @param length The length of this sequence (or sample), namely the words + * number of this sequence. + */ + void backwardOneSequence(int start, int length); + + /** + * @brief Reorganize input into batches and compute rnn forward batch + * by batch. It will convert batch shape to sequence after finishing forward. + * The batch info can refer to SequenceToBatch class. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + virtual void forwardBatch(int batchSize, + size_t numSequences, + const int* starts); + + /** + * @brief Reorganize input into batches and compute rnn forward batch + * by batch. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + virtual void backwardBatch(int batchSize, + size_t numSequences, + const int* starts); + +protected: + std::unique_ptr weight_; + std::unique_ptr bias_; + + /// frameOutput_[i] is used to hold the i-th sample of output_ + std::vector frameOutput_; + MatrixPtr prevOutput_; + /// Whether compute rnn by reverse. + bool reversed_; + /// If compute batch by batch, batchValue_ will be used to save the + /// reorganized input value. + std::unique_ptr batchValue_; + /// If compute batch by batch, batchGrad_ will be used to save the + /// gradient with respect to reorganized input value. + std::unique_ptr batchGrad_; +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index a2f07937b8834e3f3fa7a6bf2ae10f29a8d84f29..ba83667ebc9a89c37f77a7f71e6df90b54723cc0 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1472,7 +1472,8 @@ TEST(Layer, RecurrentLayer) { for (auto reversed : {false, true}) { config.layerConfig.set_reversed(reversed); config.testState = !reversed; - testLayerGrad(config, "recurrent", 50, /* trans= */ false, useGpu); + testLayerGrad( + config, "recurrent", 50, /* trans= */ false, useGpu, false, 1.0); } } } @@ -1494,7 +1495,8 @@ TEST(Layer, LstmLayer) { for (auto reversed : {false, true}) { config.layerConfig.set_reversed(reversed); config.testState = !reversed; - testLayerGrad(config, "lstmemory", 100, /* trans= */ false, useGpu); + testLayerGrad( + config, "lstmemory", 100, /* trans= */ false, useGpu, false, 0.02); } } for (auto useGpu : {true}) { diff --git a/paddle/gserver/tests/test_RecurrentLayer.cpp b/paddle/gserver/tests/test_RecurrentLayer.cpp index 16ab0e6aecb6a895b20389992a44dc542eb3b00a..0e130843339a1030f86f4d48891499bac20e9ca2 100644 --- a/paddle/gserver/tests/test_RecurrentLayer.cpp +++ b/paddle/gserver/tests/test_RecurrentLayer.cpp @@ -222,6 +222,7 @@ TEST(Layer, RecurrentLayer) { #define protected public #include "paddle/gserver/layers/GatedRecurrentLayer.h" #include "paddle/gserver/layers/LstmLayer.h" +#include "paddle/gserver/layers/RecurrentLayer.h" template class TestRecurrentLayer { public: @@ -420,12 +421,151 @@ TEST(Layer, LstmLayer) { } } +#ifdef PADDLE_WITH_MKLML + +#include "paddle/gserver/layers/MKLPackedRecurrentLayer.h" + +LayerPtr initMKLPackedLayer(LayerConfig layerConfig, + bool reversed, + int layerSize, + LayerPtr dataLayer, + ParameterPtr para, + ParameterPtr bias = nullptr) { + LayerMap layerMap; + ParameterMap parameterMap; + layerMap[dataLayer->getName()] = dataLayer; + parameterMap[para->getName()] = para; + if (bias) { + parameterMap[bias->getName()] = bias; + layerConfig.set_bias_parameter_name("bias_0"); + } + + layerConfig.set_size(layerSize); + layerConfig.set_reversed(reversed); + layerConfig.add_inputs(); + LayerInputConfig& input = *(layerConfig.mutable_inputs(0)); + input.set_input_layer_name("layer_0"); + input.set_input_parameter_name("para_0"); + + LayerPtr testLayer = Layer::create(layerConfig); + layerMap[testLayer->getName()] = testLayer; + + testLayer->init(layerMap, parameterMap); + testLayer->setNeedGradient(true); + + return testLayer; +} + +void checkMKLPackedLayer(LayerConfig layerConfig1, + LayerConfig layerConfig2, + bool reversed, + int layerSize, + int batchSize, + bool useBatch1, + bool useBatch2) { + LayerPtr dataLayer; + ParameterPtr para, bias; + + if (layerConfig1.type() == "recurrent") { + dataLayer = creatDataLayer("layer_0", batchSize, layerSize, false); + para = creatParameter("para_0", 0, layerSize * layerSize, false); + bias = nullptr; + } else if (layerConfig1.type() == "gated_recurrent") { + dataLayer = creatDataLayer("layer_0", batchSize, layerSize * 3, false); + para = creatParameter("para_0", 0, layerSize * layerSize * 3, false); + bias = creatParameterBias("bias_0", 1, layerSize * 3, false); + } + + LayerPtr testLayer1 = initMKLPackedLayer( + layerConfig1, reversed, layerSize, dataLayer, para, bias); + LayerPtr testLayer2 = initMKLPackedLayer( + layerConfig2, reversed, layerSize, dataLayer, para, bias); + + const VectorPtr& weightGrad = + (testLayer1->getParameters()[0])->getBuf(PARAMETER_GRADIENT); + const MatrixPtr& inputGrad = testLayer1->getPrev(0)->getOutputGrad(); + CpuVector wgt_grad1(weightGrad->getSize()); + CpuVector wgt_grad2(weightGrad->getSize()); + CpuMatrix input_grad1(inputGrad->getHeight(), inputGrad->getWidth()); + CpuMatrix input_grad2(inputGrad->getHeight(), inputGrad->getWidth()); + + for (int i = 0; i < 2; i++) { + FLAGS_rnn_use_batch = useBatch1; + + testLayer1->forward(PASS_GC); + + FLAGS_rnn_use_batch = useBatch2; + testLayer2->forward(PASS_GC); + + testLayer1->getOutputGrad()->randomizeUniform(); + testLayer2->getOutputGrad()->copyFrom(*testLayer1->getOutputGrad()); + + weightGrad->zero(); + inputGrad->zero(); + FLAGS_rnn_use_batch = useBatch1; + testLayer1->backward(nullptr); + + wgt_grad1.copyFrom(*weightGrad); + input_grad1.copyFrom(*inputGrad); + + weightGrad->zero(); + inputGrad->zero(); + FLAGS_rnn_use_batch = useBatch2; + testLayer2->backward(nullptr); + + wgt_grad2.copyFrom(*weightGrad); + input_grad2.copyFrom(*inputGrad); + + checkError(*testLayer1->getOutputValue(), *testLayer2->getOutputValue()); + checkError(wgt_grad1, wgt_grad2); + checkError(input_grad1, input_grad2); + } +} + +TEST(MKLPackedLayer, RecurrentLayer) { + LayerConfig layerConfig1; + LayerConfig layerConfig2; + + layerConfig1.set_name("paddle-rnn"); + layerConfig1.set_type("recurrent"); + layerConfig1.set_active_type("relu"); + + layerConfig2.set_name("mkl-packed-rnn"); + layerConfig2.set_type("mkl_packed_recurrent"); + layerConfig2.set_active_type("relu"); + + FLAGS_use_gpu = false; + + for (auto layerSize : {32, 64, 128, 256, 512}) { + for (auto batchSize : {1, 5, 100, 500}) { + for (auto reversed : {true, false}) { + for (auto paddle_use_batch : {true, false}) { + for (auto MKLPacked_use_batch : {true, false}) { + LOG(INFO) << " layerSize=" << layerSize + << " batchSize=" << batchSize << " reversed=" << reversed + << " paddle_use_batch=" << paddle_use_batch + << " MKLPacked_use_batch=" << MKLPacked_use_batch; + + checkMKLPackedLayer(layerConfig1, + layerConfig2, + reversed, + layerSize, + batchSize, + paddle_use_batch, + MKLPacked_use_batch); + } + } + } + } + } +} +#endif + int main(int argc, char** argv) { - if (version::isWithGpu()) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - return RUN_ALL_TESTS(); - } else { - return 0; + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + if (!version::isWithGpu()) { + testing::GTEST_FLAG(filter) = "-Layer.*"; } + return RUN_ALL_TESTS(); } diff --git a/paddle/inference/CMakeLists.txt b/paddle/inference/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..8437b2b21942ead544dab8636db1b355b7cf7bd5 --- /dev/null +++ b/paddle/inference/CMakeLists.txt @@ -0,0 +1,47 @@ +set(FLUID_CORE_MODULES + backward proto_desc paddle_memory executor prune init ${GLOB_OP_LIB}) + +cc_library(paddle_fluid_api + SRCS inference.cc + DEPS ${FLUID_CORE_MODULES}) + +# Merge all modules into a simgle static library +cc_library(paddle_fluid DEPS paddle_fluid_api ${FLUID_CORE_MODULES}) + +# ptools +# just for testing, we may need to change the storing format for inference_model +# and move the dependent of pickle. +# download from http://www.picklingtools.com/ +# build in the C++ sub-directory, using command +# make -f Makefile.Linux libptools.so +set(PTOOLS_LIB) +set(PTOOLS_ROOT $ENV{PTOOLS_ROOT} CACHE PATH "Folder contains PicklingTools") +find_path(PTOOLS_INC_DIR chooseser.h PATHS ${PTOOLS_ROOT}/C++) +find_library(PTOOLS_SHARED_LIB NAMES ptools PATHS ${PTOOLS_ROOT}/C++) +if(PTOOLS_INC_DIR AND PTOOLS_SHARED_LIB) + add_definitions(-DPADDLE_USE_PTOOLS) + set(PTOOLS_LIB ptools) + message(STATUS "Found PicklingTools: ${PTOOLS_SHARED_LIB}") + add_library(${PTOOLS_LIB} SHARED IMPORTED GLOBAL) + set_property(TARGET ${PTOOLS_LIB} PROPERTY IMPORTED_LOCATION ${PTOOLS_SHARED_LIB}) + include_directories(${PTOOLS_ROOT}/C++) + include_directories(${PTOOLS_ROOT}/C++/opencontainers_1_8_5/include) + add_definitions(-DOC_NEW_STYLE_INCLUDES) # used in ptools +endif() + +add_executable(example example.cc) +if(APPLE) + set(OPTIONAL_LINK_FLAGS) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") + set(OPTIONAL_LINK_FLAGS "-undefined dynamic_lookup") + endif() + target_link_libraries(example + -Wl,-force_load paddle_fluid + ${OPTIONAL_LINK_FLAGS} + ${PTOOLS_LIB}) +else() + target_link_libraries(example + -Wl,--start-group -Wl,--whole-archive paddle_fluid + -Wl,--no-whole-archive -Wl,--end-group + ${PTOOLS_LIB}) +endif() diff --git a/paddle/inference/example.cc b/paddle/inference/example.cc new file mode 100644 index 0000000000000000000000000000000000000000..9711b20e6fb4099a2cc497029468ebd1fd0b3456 --- /dev/null +++ b/paddle/inference/example.cc @@ -0,0 +1,79 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "gflags/gflags.h" +#include "paddle/inference/inference.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); +DEFINE_string(feed_var_names, "", "Names of feeding variables"); +DEFINE_string(fetch_var_names, "", "Names of fetching variables"); + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_dirname.empty() || FLAGS_feed_var_names.empty() || + FLAGS_fetch_var_names.empty()) { + // Example: + // ./example --dirname=recognize_digits_mlp.inference.model + // --feed_var_names="x" + // --fetch_var_names="fc_2.tmp_2" + std::cout << "Usage: ./example --dirname=path/to/your/model " + "--feed_var_names=x --fetch_var_names=y" + << std::endl; + exit(1); + } + + std::cout << "FLAGS_dirname: " << FLAGS_dirname << std::endl; + std::cout << "FLAGS_feed_var_names: " << FLAGS_feed_var_names << std::endl; + std::cout << "FLAGS_fetch_var_names: " << FLAGS_fetch_var_names << std::endl; + + std::string dirname = FLAGS_dirname; + std::vector feed_var_names = {FLAGS_feed_var_names}; + std::vector fetch_var_names = {FLAGS_fetch_var_names}; + + paddle::InferenceEngine* engine = new paddle::InferenceEngine(); + engine->LoadInferenceModel(dirname, feed_var_names, fetch_var_names); + + paddle::framework::LoDTensor input; + srand(time(0)); + float* input_ptr = + input.mutable_data({1, 784}, paddle::platform::CPUPlace()); + for (int i = 0; i < 784; ++i) { + input_ptr[i] = rand() / (static_cast(RAND_MAX)); + } + + std::vector feeds; + feeds.push_back(input); + std::vector fetchs; + engine->Execute(feeds, fetchs); + + for (size_t i = 0; i < fetchs.size(); ++i) { + auto dims_i = fetchs[i].dims(); + std::cout << "dims_i:"; + for (int j = 0; j < dims_i.size(); ++j) { + std::cout << " " << dims_i[j]; + } + std::cout << std::endl; + std::cout << "result:"; + float* output_ptr = fetchs[i].data(); + for (int j = 0; j < paddle::framework::product(dims_i); ++j) { + std::cout << " " << output_ptr[j]; + } + std::cout << std::endl; + } + + delete engine; + return 0; +} diff --git a/paddle/inference/inference.cc b/paddle/inference/inference.cc new file mode 100644 index 0000000000000000000000000000000000000000..49e39358e81bbee64a618be88ee0fca6aa438b93 --- /dev/null +++ b/paddle/inference/inference.cc @@ -0,0 +1,195 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "inference.h" +#include +#include "paddle/framework/executor.h" +#include "paddle/framework/feed_fetch_method.h" +#include "paddle/framework/init.h" +#include "paddle/framework/scope.h" + +#ifdef PADDLE_USE_PTOOLS +#include "chooseser.h" +#endif + +namespace paddle { + +void InferenceEngine::LoadInferenceModel( + const std::string& dirname, + const std::vector& feed_var_names, + const std::vector& fetch_var_names) { +#ifdef PADDLE_USE_PTOOLS + std::string model_filename = dirname + "/__model__"; + LOG(INFO) << "Using PicklingTools, loading model from " << model_filename; + Val v; + LoadValFromFile(model_filename.c_str(), v, SERIALIZE_P0); + std::string program_desc_str = v["program_desc_str"]; + LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); +// PicklingTools cannot parse the vector of strings correctly. +#else + std::string model_filename = dirname + "/__model__.dat"; + LOG(INFO) << "loading model from " << model_filename; + std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); + std::string program_desc_str; + inputfs.seekg(0, std::ios::end); + program_desc_str.resize(inputfs.tellg()); + inputfs.seekg(0, std::ios::beg); + LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); + inputfs.read(&program_desc_str[0], program_desc_str.size()); + inputfs.close(); +#endif + program_ = new framework::ProgramDesc(program_desc_str); + GenerateLoadProgram(dirname); + + if (feed_var_names.empty() || fetch_var_names.empty()) { + LOG(FATAL) << "Please specify the feed_var_names and fetch_var_names."; + } + feed_var_names_ = feed_var_names; + fetch_var_names_ = fetch_var_names; + PrependFeedOp(); + AppendFetchOp(); +} + +bool InferenceEngine::IsParameter(const framework::VarDesc* var) { + if (var->Persistable()) { + // There are many unreachable variables in the program + for (size_t i = 0; i < program_->Size(); ++i) { + const framework::BlockDesc& block = program_->Block(i); + for (auto* op : block.AllOps()) { + for (auto input_argument_name : op->InputArgumentNames()) { + if (input_argument_name == var->Name()) { + return true; + } + } + } + } + } + return false; +} + +void InferenceEngine::GenerateLoadProgram(const std::string& dirname) { + framework::BlockDesc* global_block = program_->MutableBlock(0); + + load_program_ = new framework::ProgramDesc(); + framework::BlockDesc* load_block = load_program_->MutableBlock(0); + for (auto* var : global_block->AllVars()) { + if (IsParameter(var)) { + LOG(INFO) << "parameter's name: " << var->Name(); + + framework::VarDesc* new_var = load_block->Var(var->Name()); + new_var->SetShape(var->Shape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + + // append_op + framework::OpDesc* op = load_block->AppendOp(); + op->SetType("load"); + op->SetOutput("Out", {new_var->Name()}); + op->SetAttr("file_path", {dirname + "/" + new_var->Name()}); + op->CheckAttrs(); + } + } +} + +void InferenceEngine::PrependFeedOp() { + if (!program_) { + LOG(FATAL) << "Please initialize the program_ first."; + } + + framework::BlockDesc* global_block = program_->MutableBlock(0); + + // create_var + framework::VarDesc* feed_var = global_block->Var("feed"); + feed_var->SetType(framework::proto::VarDesc::FEED_MINIBATCH); + feed_var->SetPersistable(true); + + // prepend feed_op + for (size_t i = 0; i < feed_var_names_.size(); ++i) { + std::string var_name = feed_var_names_[i]; + LOG(INFO) << "feed var's name: " << var_name; + + // prepend_op + framework::OpDesc* op = global_block->PrependOp(); + op->SetType("feed"); + op->SetInput("X", {"feed"}); + op->SetOutput("Out", {var_name}); + op->SetAttr("col", {static_cast(i)}); + op->CheckAttrs(); + } +} + +void InferenceEngine::AppendFetchOp() { + if (!program_) { + LOG(FATAL) << "Please initialize the program_ first."; + } + + framework::BlockDesc* global_block = program_->MutableBlock(0); + + // create_var + framework::VarDesc* fetch_var = global_block->Var("fetch"); + fetch_var->SetType(framework::proto::VarDesc::FETCH_LIST); + fetch_var->SetPersistable(true); + + // append fetch_op + for (size_t i = 0; i < fetch_var_names_.size(); ++i) { + std::string var_name = fetch_var_names_[i]; + LOG(INFO) << "fetch var's name: " << var_name; + + // append_op + framework::OpDesc* op = global_block->AppendOp(); + op->SetType("fetch"); + op->SetInput("X", {var_name}); + op->SetOutput("Out", {"fetch"}); + op->SetAttr("col", {static_cast(i)}); + op->CheckAttrs(); + } +} + +void InferenceEngine::Execute(const std::vector& feeds, + std::vector& fetchs) { + if (!program_ || !load_program_) { + LOG(FATAL) << "Please initialize the program_ and load_program_ first."; + } + + if (feeds.size() < feed_var_names_.size()) { + LOG(FATAL) << "Please feed " << feed_var_names_.size() << " input Tensors."; + } + + auto* place = new platform::CPUPlace(); + framework::InitDevices({"CPU"}); + framework::Executor* executor = new framework::Executor(*place); + framework::Scope* scope = new framework::Scope(); + + executor->Run(*load_program_, scope, 0, true, true); + + // set_feed_variable + for (size_t i = 0; i < feed_var_names_.size(); ++i) { + framework::SetFeedVariable(scope, feeds[i], "feed", i); + } + + executor->Run(*program_, scope, 0, true, true); + + // get_fetch_variable + fetchs.resize(fetch_var_names_.size()); + for (size_t i = 0; i < fetch_var_names_.size(); ++i) { + fetchs[i] = framework::GetFetchVariable(*scope, "fetch", i); + } + + delete place; + delete scope; + delete executor; +} +} // namespace paddle diff --git a/paddle/inference/inference.h b/paddle/inference/inference.h new file mode 100644 index 0000000000000000000000000000000000000000..a3f3ef4b440036a0b27353cc092eed1bbf96eeb3 --- /dev/null +++ b/paddle/inference/inference.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/block_desc.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/program_desc.h" + +namespace paddle { + +class InferenceEngine { +public: + InferenceEngine() : program_(nullptr), load_program_(nullptr) {} + ~InferenceEngine() { + delete program_; + delete load_program_; + } + + void LoadInferenceModel(const std::string& dirname, + const std::vector& feed_var_names, + const std::vector& fetch_var_names); + void Execute(const std::vector& feeds, + std::vector& fetchs); + +private: + bool IsParameter(const framework::VarDesc* var); + void GenerateLoadProgram(const std::string& dirname); + void PrependFeedOp(); + void AppendFetchOp(); + +private: + framework::ProgramDesc* program_; + framework::ProgramDesc* load_program_; + std::vector feed_var_names_; + std::vector fetch_var_names_; +}; + +} // namespace paddle diff --git a/paddle/memory/README.md b/paddle/memory/README.md index 6cb003c50bc7d142d65b0591e7e5235431d2ea42..7cf61d089b39041b7a15184e0ea9211d14a66f5e 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -12,13 +12,13 @@ p = memory::Alloc(platform::CPUPlace(), 4*1024); To allocate 4KB memory on the 3rd GPU: ```cpp -p = memory::Alloc(platform::GPUPlace(2), 4*1024); +p = memory::Alloc(platform::CUDAPlace(2), 4*1024); ``` To free memory and check the so-far used amount of memory on a place: ```cpp -auto pl = platform::GPUPlace(0); +auto pl = platform::CUDAPlace(0); p = memory::Alloc(pl, 4*1024); cout << memory::Used(pl); memory::Free(pl, p); @@ -36,7 +36,7 @@ template size_t Used(Place); } // namespace memory ``` -These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: +These function templates have specializations on either `platform::CPUPlace` or `platform::CUDAPlace`: ```cpp template<> @@ -49,7 +49,7 @@ and ```cpp template<> -void Alloc(GPUPlace p, size_t size) { +void Alloc(CUDAPlace p, size_t size) { return GetGPUBuddyAllocator(p.id)->Alloc(size); } ``` @@ -122,7 +122,7 @@ There are two implementations of `Context`: 1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory. -1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. +1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::CUDAPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. ### Majel diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc index 64ee53803891f192302bb915027f0499dfa36411..2bc2c06a15702b29c8bdf755978aebe0e6219b4a 100644 --- a/paddle/memory/detail/buddy_allocator.cc +++ b/paddle/memory/detail/buddy_allocator.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/memory/detail/buddy_allocator.h" #include "glog/logging.h" diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h index 9c41378483993101a098fc4ad1068c1ef908e566..4e0135dd655d04b7f99722a2159795738c1b29c7 100644 --- a/paddle/memory/detail/buddy_allocator.h +++ b/paddle/memory/detail/buddy_allocator.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/memory/detail/memory_block.cc b/paddle/memory/detail/memory_block.cc index fc40993208323f1f5d18103165c8835b5f829613..f50eceba096477d7b2f50f7c406770c8e9595332 100644 --- a/paddle/memory/detail/memory_block.cc +++ b/paddle/memory/detail/memory_block.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/memory/detail/memory_block.h" #include "paddle/memory/detail/meta_cache.h" diff --git a/paddle/memory/detail/memory_block.h b/paddle/memory/detail/memory_block.h index a5168b519f3a3747f34ef2ea7b87d72dce70064d..a4ca51b31b0df269b4b48f824bfd2b74f2a059fb 100644 --- a/paddle/memory/detail/memory_block.h +++ b/paddle/memory/detail/memory_block.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/memory/detail/meta_cache.cc b/paddle/memory/detail/meta_cache.cc index 7e2f92b00ca5d787c1114176c5dc3304ca3ebe26..2bacca75108f9f80e7aa291fcb4fd66112201394 100644 --- a/paddle/memory/detail/meta_cache.cc +++ b/paddle/memory/detail/meta_cache.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/memory/detail/meta_cache.h" #include "glog/logging.h" diff --git a/paddle/memory/detail/meta_cache.h b/paddle/memory/detail/meta_cache.h index cf5815644284c23a1d2abc904f8c5053ce107a72..db8ffd49ae30cf72ca691894af2df08a7106d02f 100644 --- a/paddle/memory/detail/meta_cache.h +++ b/paddle/memory/detail/meta_cache.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/memory/detail/meta_data.cc b/paddle/memory/detail/meta_data.cc index 70c5c1f439e84ec33cf0507beae33f9cdfa51727..dc57d4d2376ab1cfe6fb49c92af4591b3972a53a 100644 --- a/paddle/memory/detail/meta_data.cc +++ b/paddle/memory/detail/meta_data.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/memory/detail/meta_data.h" diff --git a/paddle/memory/detail/meta_data.h b/paddle/memory/detail/meta_data.h index 628cf1f2e347e288d1bf34c14c7b2f13a28d3662..6b83c42eb851f0487bed6d625d848cf90db00929 100644 --- a/paddle/memory/detail/meta_data.h +++ b/paddle/memory/detail/meta_data.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/memory/memcpy.cc b/paddle/memory/memcpy.cc index 5c629dc3d2aca2705e439df836214c1284b31c8f..b46141aafd7146bd3def12d86108c10f1f143d20 100644 --- a/paddle/memory/memcpy.cc +++ b/paddle/memory/memcpy.cc @@ -28,31 +28,25 @@ void Copy(platform::CPUPlace, void* dst, #ifdef PADDLE_WITH_CUDA template <> -void Copy(platform::CPUPlace dst_place, - void* dst, - platform::GPUPlace src_place, - const void* src, size_t num, - cudaStream_t stream) { +void Copy( + platform::CPUPlace dst_place, void* dst, platform::CUDAPlace src_place, + const void* src, size_t num, cudaStream_t stream) { platform::SetDeviceId(src_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream); } template <> -void Copy(platform::GPUPlace dst_place, - void* dst, - platform::CPUPlace src_place, - const void* src, size_t num, - cudaStream_t stream) { +void Copy( + platform::CUDAPlace dst_place, void* dst, platform::CPUPlace src_place, + const void* src, size_t num, cudaStream_t stream) { platform::SetDeviceId(dst_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream); } template <> -void Copy(platform::GPUPlace dst_place, - void* dst, - platform::GPUPlace src_place, - const void* src, size_t num, - cudaStream_t stream) { +void Copy( + platform::CUDAPlace dst_place, void* dst, platform::CUDAPlace src_place, + const void* src, size_t num, cudaStream_t stream) { if (dst_place == src_place) { platform::SetDeviceId(src_place.device); platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToDevice, stream); diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 9cafdfda75d0511227ef648d50a8635320a81d32..c4bb6baee7ebf2941cee5915ca2723c298689261 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -83,12 +83,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { } template <> -size_t Used(platform::GPUPlace place) { +size_t Used(platform::CUDAPlace place) { return GetGPUBuddyAllocator(place.device)->Used(); } template <> -void* Alloc(platform::GPUPlace place, size_t size) { +void* Alloc(platform::CUDAPlace place, size_t size) { auto* buddy_allocator = GetGPUBuddyAllocator(place.device); auto* ptr = buddy_allocator->Alloc(size); if (ptr == nullptr) { @@ -101,14 +101,14 @@ void* Alloc(platform::GPUPlace place, size_t size) { LOG(WARNING) << "total " << total; LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize(); LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize(); - LOG(WARNING) << "GPU memory used: " << Used(place); + LOG(WARNING) << "GPU memory used: " << Used(place); platform::SetDeviceId(cur_dev); } return ptr; } template <> -void Free(platform::GPUPlace place, void* p) { +void Free(platform::CUDAPlace place, void* p) { GetGPUBuddyAllocator(place.device)->Free(p); } diff --git a/paddle/memory/memory_test.cc b/paddle/memory/memory_test.cc index 2444931e26774ae80b916fbb7bd46ff93025d9ed..f476bf71264da59a5c546968f4689145e1d8801b 100644 --- a/paddle/memory/memory_test.cc +++ b/paddle/memory/memory_test.cc @@ -82,7 +82,7 @@ TEST(BuddyAllocator, CPUMultAlloc) { #ifdef PADDLE_WITH_CUDA -size_t align(size_t size, paddle::platform::GPUPlace place) { +size_t align(size_t size, paddle::platform::CUDAPlace place) { size += sizeof(paddle::memory::detail::Metadata); size_t alignment = paddle::platform::GpuMinChunkSize(); size_t remaining = size % alignment; @@ -94,7 +94,7 @@ TEST(BuddyAllocator, GPUAllocation) { EXPECT_EQ(p, nullptr); - paddle::platform::GPUPlace gpu(0); + paddle::platform::CUDAPlace gpu(0); p = paddle::memory::Alloc(gpu, 4096); EXPECT_NE(p, nullptr); @@ -103,7 +103,7 @@ TEST(BuddyAllocator, GPUAllocation) { } TEST(BuddyAllocator, GPUMultAlloc) { - paddle::platform::GPUPlace gpu; + paddle::platform::CUDAPlace gpu; std::unordered_map ps; diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 5aaaf993323c2d4dbef688d0977ec6374fde6512..f1ce52332327ed2a9f290ccf412199fd5a6bbb67 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -1,5 +1,6 @@ file(GLOB GENERAL_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") string(REPLACE ".cc" "" GENERAL_OPS "${GENERAL_OPS}") +set(DEPS_OPS "") set(pybind_file ${PADDLE_SOURCE_DIR}/paddle/pybind/pybind.h) file(WRITE ${pybind_file} "// Generated by the paddle/operator/CMakeLists.txt. DO NOT EDIT!\n\n") function(op_library TARGET) @@ -48,6 +49,10 @@ function(op_library TARGET) message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") endif() + list(LENGTH op_library_DEPS op_library_DEPS_len) + if (${op_library_DEPS_len} GREATER 0) + set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE) + endif() if (WITH_GPU) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) @@ -56,106 +61,28 @@ function(op_library TARGET) ${op_common_deps}) endif() - # net_op doesn't need pybind - if ("${TARGET}" STREQUAL "net_op") - set(pybind_flag 1) - endif() - - if ("${TARGET}" STREQUAL "compare_op") - set(pybind_flag 1) - file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(equal);\n") - endif() - - # conv_op contains several operators - if ("${TARGET}" STREQUAL "conv_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(conv2d);\n") - endif() - - # conv_cudnn_op contains several operators - if ("${TARGET}" STREQUAL "conv_cudnn_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(conv2d_cudnn);\n") - endif() - - # pool_op contains several operators - if ("${TARGET}" STREQUAL "pool_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(pool2d);\n") - endif() - - # pool_cudnn_op contains several operators - if ("${TARGET}" STREQUAL "pool_cudnn_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(pool2d_cudnn);\n") - endif() - - if ("${TARGET}" STREQUAL "logical_op") - set(pybind_flag 1) - file(APPEND ${pybind_file} "USE_OP(logical_and);\n") - endif() - - # pool_with_index_op contains several operators - if ("${TARGET}" STREQUAL "pool_with_index_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") - endif() - - # conv_transpose_op contains several operators - if ("${TARGET}" STREQUAL "conv_transpose_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(conv2d_transpose);\n") - endif() - - # conv_transpose_cudnn_op contains two operators - if ("${TARGET}" STREQUAL "conv_transpose_cudnn_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(conv2d_transpose_cudnn);\n") - endif() - - # save_restore_op contains several operators - if ("${TARGET}" STREQUAL "save_restore_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(save);\n") - endif() - - # activation_op contains several operators - if ("${TARGET}" STREQUAL "activation_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(sigmoid);\n") - endif() - - # nccl_op contains several operators - if ("${TARGET}" STREQUAL "nccl_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(ncclAllReduce);\n") - endif() - - # reduce_op contains several operators - if ("${TARGET}" STREQUAL "reduce_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(reduce_sum);\n") - endif() + # Define operators that don't need pybind here. + foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op") + if ("${TARGET}" STREQUAL "${manual_pybind_op}") + set(pybind_flag 1) + endif() + endforeach() - if ("${TARGET}" STREQUAL "tensor_array_read_write_op") - set(pybind_flag 1) - file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(read_from_array);\nUSE_NO_KERNEL_OP(write_to_array);\n") + # The registration of USE_OP, please refer to paddle/framework/op_registry.h. + # Note that it's enough to just adding one operator to pybind in a *_op.cc file. + # And for detail pybind information, please see generated paddle/pybind/pybind.h. + file(READ ${TARGET}.cc TARGET_CONTENT) + string(REGEX MATCH "REGISTER_OP\\(.*REGISTER_OP\\(" multi_register "${TARGET_CONTENT}") + string(REGEX MATCH "REGISTER_OP\\([a-z0-9_]*," one_register "${multi_register}") + if (one_register STREQUAL "") + string(REPLACE "_op" "" TARGET "${TARGET}") + else () + string(REPLACE "REGISTER_OP(" "" TARGET "${one_register}") + string(REPLACE "," "" TARGET "${TARGET}") endif() # pybind USE_NO_KERNEL_OP # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel - file(READ ${TARGET}.cc TARGET_CONTENT) string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}") string(REPLACE "_op" "" TARGET "${TARGET}") if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") @@ -166,7 +93,6 @@ function(op_library TARGET) # pybind USE_CPU_ONLY_OP list(LENGTH cu_srcs cu_srcs_len) list(LENGTH cu_cc_srcs cu_cc_srcs_len) - if (${pybind_flag} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0) file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") set(pybind_flag 1) @@ -181,58 +107,31 @@ endfunction() add_subdirectory(math) add_subdirectory(nccl) -set(DEPS_OPS - cond_op - cross_entropy_op - recurrent_op - softmax_with_cross_entropy_op - softmax_op - sequence_softmax_op - sum_op - pool_op - maxout_op - unpool_op - pool_with_index_op - conv_op - conv_transpose_op - nccl_op - sequence_conv_op - sequence_pool_op - lod_rank_table_op - lod_tensor_to_array_op - array_to_lod_tensor_op - max_sequence_len_op - lstm_op - tensor_array_read_write_op - gru_op - adagrad_op - sgd_op - save_op - load_op - send_op - recv_op) +if(WITH_GPU) + op_library(nccl_op DEPS nccl_common) + file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(ncclAllReduce);\n") +else() + set(DEPS_OPS ${DEPS_OPS} nccl_op) +endif() if(WITH_DISTRIBUTE) -add_subdirectory(detail) -op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) -set_source_files_properties( - send_op.cc - PROPERTIES - COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - -op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) -set_source_files_properties( - recv_op.cc - PROPERTIES - COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - -cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor) + add_subdirectory(detail) + set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) + set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") + op_library(send_op DEPS ${DISTRIBUTE_DEPS}) + set_source_files_properties(send_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + op_library(recv_op DEPS ${DISTRIBUTE_DEPS}) + set_source_files_properties(recv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor) +else() + set(DEPS_OPS ${DEPS_OPS} send_op recv_op) endif() -op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) +op_library(cond_op DEPS framework_proto tensor net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_op DEPS softmax) +op_library(detection_output_op DEPS softmax) op_library(sequence_softmax_op DEPS softmax) op_library(sum_op DEPS selected_rows_functor) op_library(sgd_op DEPS selected_rows_functor) @@ -242,21 +141,18 @@ op_library(pool_op DEPS pooling) op_library(maxout_op DEPS maxouting) op_library(unpool_op DEPS unpooling) op_library(pool_with_index_op DEPS pooling) -op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table) -op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op) -op_library(array_to_lod_tensor_op SRCS array_to_lod_tensor_op.cc DEPS lod_rank_table_op) -op_library(max_sequence_len_op SRCS max_sequence_len_op.cc DEPS lod_rank_table) -op_library(tensor_array_read_write_op SRCS tensor_array_read_write_op.cc) -if(WITH_GPU) -op_library(nccl_op DEPS nccl_common) -endif() +op_library(lod_rank_table_op DEPS lod_rank_table) +op_library(lod_tensor_to_array_op DEPS lod_rank_table_op) +op_library(array_to_lod_tensor_op DEPS lod_rank_table_op) +op_library(max_sequence_len_op DEPS lod_rank_table) op_library(sequence_conv_op DEPS context_project) op_library(sequence_pool_op DEPS sequence_pooling) op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(conv_transpose_op DEPS vol2col) op_library(gru_op DEPS sequence2batch gru_compute) -op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) - +op_library(recurrent_op DEPS executor) +op_library(cos_sim_op DEPS cos_sim_functor) +op_library(parallel_do_op DEPS executor) # FIXME(typhoonzero): save/load depends lodtensor serialization functions op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) @@ -265,9 +161,10 @@ list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) op_library(${src}) endforeach() +file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n") -set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") +set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") cc_test(gather_test SRCS gather_test.cc DEPS tensor) @@ -276,6 +173,6 @@ cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) if(WITH_GPU) - cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) + cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index b8ed93f4eb549fbd76bf360d4b843c1fa9635b40..d7baa6e90538484b400f32587a052d394a8d10d5 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -53,7 +53,7 @@ class AccuracyOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Out")->type()), diff --git a/paddle/operators/accuracy_op.cu b/paddle/operators/accuracy_op.cu index dd51aad105fecf4e3118f03e2f1868abb5523bc8..0aadd5af41531e54b357756441f92da668d4ec01 100644 --- a/paddle/operators/accuracy_op.cu +++ b/paddle/operators/accuracy_op.cu @@ -56,7 +56,7 @@ class AccuracyOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* inference = ctx.Input("Out"); auto* indices = ctx.Input("Indices"); auto* label = ctx.Input("Label"); diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 2b4c7e5f0de8347d4789136a3a45408ada439f02..4188858a90daf8b2c10eb6960393de977d467371 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/activation_op.h" @@ -22,8 +22,8 @@ class ActivationOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - ctx->SetOutputDim("Y", ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*->*/ "Y"); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*->*/ "Out"); } }; @@ -32,7 +32,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Y")); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Out")); } }; @@ -41,11 +41,11 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Sigmoid operator"); - AddOutput("Y", "Output of Sigmoid operator"); + AddOutput("Out", "Output of Sigmoid operator"); AddComment(R"DOC( Sigmoid Activation Operator -$$y = \frac{1}{1 + e^{-x}}$$ +$$out = \frac{1}{1 + e^{-x}}$$ )DOC"); } @@ -56,11 +56,11 @@ class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { LogSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LogSigmoid operator"); - AddOutput("Y", "Output of LogSigmoid operator"); + AddOutput("Out", "Output of LogSigmoid operator"); AddComment(R"DOC( Logsigmoid Activation Operator -$$y = \log \frac{1}{1 + e^{-x}}$$ +$$out = \log \frac{1}{1 + e^{-x}}$$ )DOC"); } @@ -71,11 +71,11 @@ class ExpOpMaker : public framework::OpProtoAndCheckerMaker { ExpOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Exp operator"); - AddOutput("Y", "Output of Exp operator"); + AddOutput("Out", "Output of Exp operator"); AddComment(R"DOC( Exp Activation Operator. -$y = e^x$ +$out = e^x$ )DOC"); } @@ -86,11 +86,11 @@ class ReluOpMaker : public framework::OpProtoAndCheckerMaker { ReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu operator"); - AddOutput("Y", "Output of Relu operator"); + AddOutput("Out", "Output of Relu operator"); AddComment(R"DOC( Relu Activation Operator. -$y = \max(x, 0)$ +$out = \max(x, 0)$ )DOC"); } @@ -101,12 +101,12 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { LeakyReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LeakyRelu operator"); - AddOutput("Y", "Output of LeakyRelu operator"); + AddOutput("Out", "Output of LeakyRelu operator"); AddAttr("alpha", "The small negative slope").SetDefault(0.02f); AddComment(R"DOC( LeakyRelu Activation Operator. -$y = \max(x, \alpha * x)$ +$out = \max(x, \alpha * x)$ )DOC"); } @@ -117,13 +117,13 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { SoftShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softshrink operator"); - AddOutput("Y", "Output of Softshrink operator"); + AddOutput("Out", "Output of Softshrink operator"); AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( Softshrink Activation Operator. $$ -y = \begin{cases} +out = \begin{cases} x - \lambda, \text{if } x > \lambda \\ x + \lambda, \text{if } x < -\lambda \\ 0, \text{otherwise} @@ -139,11 +139,11 @@ class TanhOpMaker : public framework::OpProtoAndCheckerMaker { TanhOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Tanh operator"); - AddOutput("Y", "Output of Tanh operator"); + AddOutput("Out", "Output of Tanh operator"); AddComment(R"DOC( Tanh Activation Operator. -$$y = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"); } @@ -154,11 +154,11 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { TanhShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of TanhShrink operator"); - AddOutput("Y", "Output of TanhShrink operator"); + AddOutput("Out", "Output of TanhShrink operator"); AddComment(R"DOC( TanhShrink Activation Operator. -$$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"); } @@ -169,14 +169,14 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { HardShrinkOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardShrink operator"); - AddOutput("Y", "Output of HardShrink operator"); + AddOutput("Out", "Output of HardShrink operator"); AddAttr("threshold", "The value of threshold for HardShrink") .SetDefault(0.5f); AddComment(R"DOC( HardShrink Activation Operator. $$ -y = \begin{cases} +out = \begin{cases} x, \text{if } x > \lambda \\ x, \text{if } x < -\lambda \\ 0, \text{otherwise} @@ -192,11 +192,11 @@ class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { SqrtOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Sqrt operator"); - AddOutput("Y", "Output of Sqrt operator"); + AddOutput("Out", "Output of Sqrt operator"); AddComment(R"DOC( Sqrt Activation Operator. -$y = \sqrt{x}$ +$out = \sqrt{x}$ )DOC"); } @@ -207,11 +207,11 @@ class AbsOpMaker : public framework::OpProtoAndCheckerMaker { AbsOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Abs operator"); - AddOutput("Y", "Output of Abs operator"); + AddOutput("Out", "Output of Abs operator"); AddComment(R"DOC( Abs Activation Operator. -$y = |x|$ +$out = |x|$ )DOC"); } @@ -222,11 +222,11 @@ class CeilOpMaker : public framework::OpProtoAndCheckerMaker { CeilOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Ceil operator"); - AddOutput("Y", "Output of Ceil operator"); + AddOutput("Out", "Output of Ceil operator"); AddComment(R"DOC( Ceil Activation Operator. -$y = ceil(x)$ +$out = ceil(x)$ )DOC"); } @@ -237,11 +237,11 @@ class FloorOpMaker : public framework::OpProtoAndCheckerMaker { FloorOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Floor operator"); - AddOutput("Y", "Output of Floor operator"); + AddOutput("Out", "Output of Floor operator"); AddComment(R"DOC( Floor Activation Operator. -$y = floor(x)$ +$out = floor(x)$ )DOC"); } @@ -252,11 +252,11 @@ class RoundOpMaker : public framework::OpProtoAndCheckerMaker { RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Round operator"); - AddOutput("Y", "Output of Round operator"); + AddOutput("Out", "Output of Round operator"); AddComment(R"DOC( Round Activation Operator. -$y = [x]$ +$out = [x]$ )DOC"); } @@ -267,11 +267,11 @@ class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { ReciprocalOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Reciprocal operator"); - AddOutput("Y", "Output of Reciprocal operator"); + AddOutput("Out", "Output of Reciprocal operator"); AddComment(R"DOC( Reciprocal Activation Operator. -$$y = \frac{1}{x}$$ +$$out = \frac{1}{x}$$ )DOC"); } @@ -282,11 +282,11 @@ class LogOpMaker : public framework::OpProtoAndCheckerMaker { LogOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Log operator"); - AddOutput("Y", "Output of Log operator"); + AddOutput("Out", "Output of Log operator"); AddComment(R"DOC( Log Activation Operator. -$y = \ln(x)$ +$out = \ln(x)$ Natural logarithm of x. @@ -299,11 +299,11 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker { SquareOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Square operator"); - AddOutput("Y", "Output of Square operator"); + AddOutput("Out", "Output of Square operator"); AddComment(R"DOC( Square Activation Operator. -$y = x^2$ +$out = x^2$ )DOC"); } @@ -314,11 +314,11 @@ class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker { SoftplusOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softplus operator"); - AddOutput("Y", "Output of Softplus operator"); + AddOutput("Out", "Output of Softplus operator"); AddComment(R"DOC( Softplus Activation Operator. -$y = \ln(1 + e^{x})$ +$out = \ln(1 + e^{x})$ )DOC"); } @@ -329,11 +329,11 @@ class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { SoftsignOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softsign operator"); - AddOutput("Y", "Output of Softsign operator"); + AddOutput("Out", "Output of Softsign operator"); AddComment(R"DOC( Softsign Activation Operator. -$$y = \frac{x}{1 + |x|}$$ +$$out = \frac{x}{1 + |x|}$$ )DOC"); } @@ -344,7 +344,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker { BReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of BRelu operator"); - AddOutput("Y", "Output of BRelu operator"); + AddOutput("Out", "Output of BRelu operator"); AddAttr("t_min", "The min marginal value of BRelu") .SetDefault(static_cast(0)); AddAttr("t_max", "The max marginal value of BRelu") @@ -352,7 +352,7 @@ class BReluOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( BRelu Activation Operator. -$y = \max(\min(x, t_{min}), t_{max})$ +$out = \max(\min(x, t_{min}), t_{max})$ )DOC"); } @@ -363,13 +363,13 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { SoftReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of SoftRelu operator"); - AddOutput("Y", "Output of SoftRelu operator"); + AddOutput("Out", "Output of SoftRelu operator"); AddAttr("threshold", "The threshold value of SoftRelu") .SetDefault(40.0f); AddComment(R"DOC( SoftRelu Activation Operator. -$y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ +$out = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ )DOC"); } @@ -380,7 +380,7 @@ class ELUOpMaker : public framework::OpProtoAndCheckerMaker { ELUOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ELU operator"); - AddOutput("Y", "Output of ELU operator"); + AddOutput("Out", "Output of ELU operator"); AddAttr("alpha", "The alpha value of ELU").SetDefault(1.0f); AddComment(R"DOC( ELU Activation Operator. @@ -388,7 +388,7 @@ ELU Activation Operator. Applies the following element-wise computation on the input according to https://arxiv.org/abs/1511.07289. -$y = \max(0, x) + \min(0, \alpha * (e^x - 1))$ +$out = \max(0, x) + \min(0, \alpha * (e^x - 1))$ )DOC"); } @@ -399,13 +399,13 @@ class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { Relu6OpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu6 operator"); - AddOutput("Y", "Output of Relu6 operator"); + AddOutput("Out", "Output of Relu6 operator"); AddAttr("threshold", "The threshold value of Relu6") .SetDefault(6.0f); AddComment(R"DOC( Relu6 Activation Operator. -$y = \min(\max(0, x), 6)$ +$out = \min(\max(0, x), 6)$ )DOC"); } @@ -416,12 +416,12 @@ class PowOpMaker : public framework::OpProtoAndCheckerMaker { PowOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Pow operator"); - AddOutput("Y", "Output of Pow operator"); + AddOutput("Out", "Output of Pow operator"); AddAttr("factor", "The exponential factor of Pow").SetDefault(1.0f); AddComment(R"DOC( Pow Activation Operator. -$y = x^{factor}$ +$out = x^{factor}$ )DOC"); } @@ -432,7 +432,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { STanhOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of STanh operator"); - AddOutput("Y", "Output of STanh operator"); + AddOutput("Out", "Output of STanh operator"); AddAttr("scale_a", "The scale parameter of a for the input") .SetDefault(2.0f / 3.0f); AddAttr("scale_b", "The scale parameter of b for the input") @@ -440,7 +440,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( STanh Activation Operator. -$$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ +$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ )DOC"); } @@ -451,14 +451,14 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ThresholdedReluOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ThresholdedRelu operator"); - AddOutput("Y", "Output of ThresholdedRelu operator"); + AddOutput("Out", "Output of ThresholdedRelu operator"); AddAttr("threshold", "The threshold location of activation") .SetDefault(1.0f); AddComment(R"DOC( ThresholdedRelu Activation Operator. $$ -y = \begin{cases} +out = \begin{cases} x, \text{if } x > threshold \\ 0, \text{otherwise} \end{cases} @@ -473,7 +473,7 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { HardSigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardSigmoid operator"); - AddOutput("Y", "Output of HardSigmoid operator"); + AddOutput("Out", "Output of HardSigmoid operator"); AddAttr("slope", "Slope for linear approximation of sigmoid") .SetDefault(0.2f); AddAttr("offset", "Offset for linear approximation of sigmoid") @@ -484,7 +484,7 @@ HardSigmoid Activation Operator. Segment-wise linear approximation of sigmoid(https://arxiv.org/abs/1603.00391), which is much faster than sigmoid. -$y = \max(0, \min(1, slope * x + shift))$ +$out = \max(0, \min(1, slope * x + shift))$ The slope should be positive. The offset can be either positive or negative. The default slope and shift are set according to the above reference. @@ -499,12 +499,12 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker { SwishOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Swish operator"); - AddOutput("Y", "Output of Swish operator"); + AddOutput("Out", "Output of Swish operator"); AddAttr("beta", "Constant beta of swish operator").SetDefault(1.0f); AddComment(R"DOC( Swish Activation Operator. -$$y = \frac{x}{1 + e^{- \beta x}}$$ +$$out = \frac{x}{1 + e^{- \beta x}}$$ )DOC"); } diff --git a/paddle/operators/activation_op.cu b/paddle/operators/activation_op.cu index 856d3fc35dafe6b22c25c55dfda2dc4973072615..b9ccdf639cf4a9ea80d530e550c16089e50c44e0 100644 --- a/paddle/operators/activation_op.cu +++ b/paddle/operators/activation_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/activation_op.h" diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 75eefca8b8c7ba8831a2f90c83718d00b83fba30..88c3d1c597a853abdee7753a5110be4a1726e905 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -1,20 +1,21 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -26,12 +27,16 @@ class ActivationKernel using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { - auto* X = context.Input("X"); - auto* Y = context.Output("Y"); - Y->mutable_data(context.GetPlace()); - - auto x = framework::EigenVector::Flatten(*X); - auto y = framework::EigenVector::Flatten(*Y); + auto& X = detail::Ref(context.Input("X"), + "Cannot get input tensor X, variable name = %s", + context.op().Input("X")); + + auto& Out = detail::Ref(context.Output("Out"), + "Cannot get output tensor Out, variable name = %s", + context.op().Output("Out")); + Out.mutable_data(context.GetPlace()); + auto x = framework::EigenVector::Flatten(X); + auto out = framework::EigenVector::Flatten(Out); auto* place = context.template device_context().eigen_device(); Functor functor; @@ -40,7 +45,7 @@ class ActivationKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, y); + functor(*place, x, out); } }; @@ -51,14 +56,15 @@ class ActivationGradKernel using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); - auto* Y = context.Input("Y"); - auto* dY = context.Input(framework::GradVarName("Y")); + auto* Out = context.Input("Out"); + auto* dOut = + context.Input(framework::GradVarName("Out")); auto* dX = context.Output(framework::GradVarName("X")); dX->mutable_data(context.GetPlace()); - auto dy = framework::EigenVector::Flatten(*dY); + auto dout = framework::EigenVector::Flatten(*dOut); auto x = framework::EigenVector::Flatten(*X); - auto y = framework::EigenVector::Flatten(*Y); + auto out = framework::EigenVector::Flatten(*Out); auto dx = framework::EigenVector::Flatten(*dX); auto* place = context.template device_context().eigen_device(); @@ -67,7 +73,7 @@ class ActivationGradKernel for (auto& attr : attrs) { *attr.second = context.Attr(attr.first); } - functor(*place, x, y, dy, dx); + functor(*place, x, out, dout, dx); } }; @@ -83,17 +89,18 @@ struct BaseActivationFunctor { // sigmoid(x) = 1 / (1 + exp(-x)) template struct SigmoidFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = static_cast(1) / (static_cast(1) + (-x).exp()); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = static_cast(1) / (static_cast(1) + (-x).exp()); } }; template struct SigmoidGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * y * (static_cast(1) - y); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * out * (static_cast(1) - out); } }; @@ -101,7 +108,7 @@ struct SigmoidGradFunctor : public BaseActivationFunctor { // For numerical stability, we can use the log-sum-exp trick: // https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/ // We can rewrite the above equation as: -// y = -log( exp(0) + exp(-x)) [since exp(0) = 1] +// out = -log( exp(0) + exp(-x)) [since exp(0) = 1] // = -log( exp(max(-x, 0) - max(-x, 0)) + exp(-x + max(-x, 0) - max(-x, 0))) // = -log( exp(max(-x, 0)) * exp(-max(-x, 0)) - exp(max(-x, 0)) * exp(-x - // max(-x, 0))) @@ -112,10 +119,10 @@ struct SigmoidGradFunctor : public BaseActivationFunctor { // + exp(-x - max(-x, 0)))) template struct LogSigmoidFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto temp = (-x).cwiseMax(static_cast(0)); // temp = max(-x, 0) - y.device(d) = -temp - (((-temp).exp() + (-x - temp).exp()).log()); + out.device(d) = -temp - (((-temp).exp() + (-x - temp).exp()).log()); } }; @@ -124,62 +131,66 @@ struct LogSigmoidFunctor : public BaseActivationFunctor { // exp(-x - max(-x, 0))) template struct LogSigmoidGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp = (-x).cwiseMax(static_cast(0)); // temp = max(-x, 0) dx.device(d) = - dy * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp())); + dout * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp())); } }; // exp(x) = e^x template struct ExpFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.exp(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.exp(); } }; template struct ExpGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * y; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * out; } }; // relu(x) = max(x, 0) template struct ReluFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.cwiseMax(static_cast(0)); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(0)); } }; template struct ReluGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (x > static_cast(0)).template cast(); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (x > static_cast(0)).template cast(); } }; // tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) template struct TanhFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.tanh(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.tanh(); } }; template struct TanhGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (static_cast(1) - y * y); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (static_cast(1) - out * out); } }; @@ -187,17 +198,18 @@ struct TanhGradFunctor : public BaseActivationFunctor { // where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) template struct TanhShrinkFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x - x.tanh(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x - x.tanh(); } }; template struct TanhShrinkGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (x.tanh() * x.tanh()); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (x.tanh() * x.tanh()); } }; @@ -210,11 +222,11 @@ struct HardShrinkFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto temp1 = (x < static_cast(threshold * -1)).template cast().eval(); auto temp2 = (x > static_cast(threshold)).template cast().eval(); - y.device(d) = x * (temp1 + temp2); + out.device(d) = x * (temp1 + temp2); } }; @@ -226,11 +238,12 @@ struct HardShrinkGradFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = (x < static_cast(threshold * -1)).template cast().eval(); auto temp2 = (x > static_cast(threshold)).template cast().eval(); - dx.device(d) = dy * (temp1 + temp2).template cast(); + dx.device(d) = dout * (temp1 + temp2).template cast(); } }; @@ -243,12 +256,12 @@ struct SoftShrinkFunctor : public BaseActivationFunctor { return {{"lambda", &lambda}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto lambdaT = static_cast(lambda); auto temp1 = (x > lambdaT).template cast().eval(); auto temp2 = (x < -lambdaT).template cast().eval(); - y.device(d) = temp1 * (x - lambdaT) + temp2 * (x + lambdaT); + out.device(d) = temp1 * (x - lambdaT) + temp2 * (x + lambdaT); } }; @@ -258,46 +271,49 @@ struct SoftShrinkGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"lambda", &lambda}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto lambdaT = static_cast(lambda); auto temp1 = (x > lambdaT).template cast().eval(); auto temp2 = (x < -lambdaT).template cast().eval(); - dx.device(d) = dy * (temp1 + temp2).template cast(); + dx.device(d) = dout * (temp1 + temp2).template cast(); } }; // sqrt(x) = x^(1/2) template struct SqrtFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.sqrt(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.sqrt(); } }; template struct SqrtGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - const Y y_conj = Eigen::numext::conj(y); - dx.device(d) = static_cast(0.5) * dy / y_conj; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + const Out out_conj = Eigen::numext::conj(out); + dx.device(d) = static_cast(0.5) * dout / out_conj; } }; // ceil(x) = ceiling(x) template struct CeilFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.ceil(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.ceil(); } }; template struct ZeroGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { dx.device(d) = static_cast(0) / x; } }; @@ -305,86 +321,90 @@ struct ZeroGradFunctor : public BaseActivationFunctor { // floor(x) = flooring(x) template struct FloorFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.ceil(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.ceil(); } }; // round(x) = [x] template struct RoundFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.round(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.round(); } }; // abs(x) = |x| template struct AbsFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.abs(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.abs(); } }; template struct AbsGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * x.sign(); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * x.sign(); } }; // reciprocal(x) = 1 / x template struct ReciprocalFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = static_cast(1) / x; + template + void operator()(Device d, X x, Out out) const { + out.device(d) = static_cast(1) / x; } }; template struct ReciprocalGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * static_cast(-1) * y * y; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(-1) * out * out; } }; // log(x) = natural logarithm of x template struct LogFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.log(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.log(); } }; template struct LogGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (static_cast(1) / x); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (static_cast(1) / x); } }; // square(x) = x^2 template struct SquareFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.square(); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.square(); } }; template struct SquareGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * static_cast(2) * x; + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(2) * x; } }; @@ -399,9 +419,9 @@ struct BReluFunctor : public BaseActivationFunctor { return {{"t_min", &t_min}, {"t_max", &t_max}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(t_min)).cwiseMin(static_cast(t_max)); } }; @@ -413,9 +433,10 @@ struct BReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"t_min", &t_min}, {"t_max", &t_max}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * ((x > static_cast(t_min)) * (x < static_cast(t_max))) .template cast(); } @@ -430,9 +451,9 @@ struct Relu6Functor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(0)).cwiseMin(static_cast(threshold)); } }; @@ -443,9 +464,10 @@ struct Relu6GradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * ((x > static_cast(0)) * (x < static_cast(threshold))) .template cast(); } @@ -458,10 +480,10 @@ struct Relu6GradFunctor : public BaseActivationFunctor { // Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0))) template struct SoftplusFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) { + template + void operator()(Device d, X x, Out out) { auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) - y.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log()); + out.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log()); } }; @@ -471,19 +493,21 @@ struct SoftplusFunctor : public BaseActivationFunctor { // exp(x - max(x, 0))) template struct SoftplusGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) { auto temp = x.cwiseMax(static_cast(0)); // temp = max(x, 0) - dx.device(d) = dy * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp())); + dx.device(d) = + dout * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp())); } }; // softsign(x) = x / (1 + |x|) template struct SoftsignFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y) { - y.device(d) = x / (static_cast(1) + x.abs()); + template + void operator()(Device d, X x, Out out) { + out.device(d) = x / (static_cast(1) + x.abs()); } }; @@ -491,10 +515,11 @@ struct SoftsignFunctor : public BaseActivationFunctor { // Taken from https://en.wikipedia.org/wiki/Activation_function template struct SoftsignGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Y y, dY dy, dX dx) { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) { dx.device(d) = - dy * (static_cast(1) / (static_cast(1) + x.abs()).square()); + dout * (static_cast(1) / (static_cast(1) + x.abs()).square()); } }; @@ -505,11 +530,11 @@ struct SoftReluFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto tmp = static_cast(threshold); auto temp = x.cwiseMax(-tmp).cwiseMin(tmp); - y.device(d) = (static_cast(1) + temp.exp()).log(); + out.device(d) = (static_cast(1) + temp.exp()).log(); } }; @@ -519,11 +544,12 @@ struct SoftReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto tmp = static_cast(threshold); auto temp = ((x > -tmp) * (x < tmp)).template cast().eval(); - dx.device(d) = dy * (static_cast(1) - (-y).exp()) * temp; + dx.device(d) = dout * (static_cast(1) - (-out).exp()) * temp; } }; @@ -534,9 +560,9 @@ struct LeakyReluFunctor : public BaseActivationFunctor { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.cwiseMax(static_cast(alpha) * x); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(alpha) * x); } }; @@ -546,12 +572,13 @@ struct LeakyReluGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = static_cast(alpha) * (x < static_cast(0)).template cast().eval(); auto temp2 = (x >= static_cast(0)).template cast().eval(); - dx.device(d) = dy * (temp1 + temp2).template cast(); + dx.device(d) = dout * (temp1 + temp2).template cast(); } }; @@ -562,11 +589,11 @@ struct ELUFunctor : public BaseActivationFunctor { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.cwiseMax(static_cast(0)) + - (static_cast(alpha) * (x.exp() - static_cast(1))) - .cwiseMin(static_cast(0)); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.cwiseMax(static_cast(0)) + + (static_cast(alpha) * (x.exp() - static_cast(1))) + .cwiseMin(static_cast(0)); } }; @@ -576,10 +603,11 @@ struct ELUGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"alpha", &alpha}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * (x > static_cast(0)).template cast() + - dy * (y + static_cast(alpha)) * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (x > static_cast(0)).template cast() + + dout * (out + static_cast(alpha)) * (x < static_cast(0)).template cast(); } }; @@ -591,9 +619,9 @@ struct PowFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"factor", &factor}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x.pow(static_cast(factor)); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.pow(static_cast(factor)); } }; @@ -603,9 +631,10 @@ struct PowGradFunctor : public BaseActivationFunctor { typename BaseActivationFunctor::AttrPair GetAttrs() { return {{"factor", &factor}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = dy * static_cast(factor) * + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(factor) * x.pow(static_cast(factor - static_cast(1))); } }; @@ -618,9 +647,9 @@ struct STanhFunctor : public BaseActivationFunctor { return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = + template + void operator()(Device d, X x, Out out) const { + out.device(d) = static_cast(scale_b) * (static_cast(scale_a) * x).tanh(); } }; @@ -633,12 +662,13 @@ struct STanhGradFunctor : public BaseActivationFunctor { return {{"scale_a", &scale_a}, {"scale_b", &scale_b}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto a = static_cast(scale_a); auto b = static_cast(scale_b); auto temp = (a * x).tanh() * (a * x).tanh(); - dx.device(d) = dy * a * b * (static_cast(1) - temp); + dx.device(d) = dout * a * b * (static_cast(1) - temp); } }; @@ -649,10 +679,10 @@ struct ThresholdedReluFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto th = static_cast(threshold); - y.device(d) = (x > th).template cast() * x; + out.device(d) = (x > th).template cast() * x; } }; @@ -663,10 +693,11 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor { return {{"threshold", &threshold}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto th = static_cast(threshold); - dx.device(d) = dy * (x > th).template cast(); + dx.device(d) = dout * (x > th).template cast(); } }; @@ -678,10 +709,11 @@ struct HardSigmoidFunctor : public BaseActivationFunctor { return {{"slope", &slope}, {"offset", &offset}}; } - template - void operator()(Device d, X x, Y y) const { + template + void operator()(Device d, X x, Out out) const { auto temp = x * static_cast(slope) + static_cast(offset); - y.device(d) = temp.cwiseMax(static_cast(0)).cwiseMin(static_cast(1)); + out.device(d) = + temp.cwiseMax(static_cast(0)).cwiseMin(static_cast(1)); } }; @@ -693,12 +725,13 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { return {{"slope", &slope}, {"offset", &offset}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { - dx.device(d) = - dy * - ((y > static_cast(0)) * (y < static_cast(1))).template cast() * - static_cast(slope); + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * + ((out > static_cast(0)) * (out < static_cast(1))) + .template cast() * + static_cast(slope); } }; @@ -709,9 +742,9 @@ struct SwishFunctor : public BaseActivationFunctor { return {{"beta", &beta}}; } - template - void operator()(Device d, X x, Y y) const { - y.device(d) = x / (static_cast(1) + (static_cast(-beta) * x).exp()); + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x / (static_cast(1) + (static_cast(-beta) * x).exp()); } }; @@ -722,12 +755,13 @@ struct SwishGradFunctor : public BaseActivationFunctor { return {{"beta", &beta}}; } - template - void operator()(Device d, X x, Y y, dY dy, dX dx) const { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { auto temp1 = static_cast(1) / (static_cast(1) + (static_cast(-beta) * x).exp()); - auto temp2 = temp1 * (static_cast(1) - (beta * y)); - dx.device(d) = dy * ((beta * y) + temp2); + auto temp2 = temp1 * (static_cast(1) - (beta * out)); + dx.device(d) = dout * ((beta * out) + temp2); } }; diff --git a/paddle/operators/adadelta_op.cu b/paddle/operators/adadelta_op.cu index eee2d0a2f55f877bc5c87c72bca07bfd9485e517..91294a0d5d148a43bb95ab83ae8176b475fde9de 100644 --- a/paddle/operators/adadelta_op.cu +++ b/paddle/operators/adadelta_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/adadelta_op.h" diff --git a/paddle/operators/adagrad_op.cc b/paddle/operators/adagrad_op.cc index 052c793a01907abdc7784d1290f43543ae81bdb1..c83318a272302a474c37ce86365201acf56b9cad 100644 --- a/paddle/operators/adagrad_op.cc +++ b/paddle/operators/adagrad_op.cc @@ -105,48 +105,18 @@ struct SparseAdagradFunctor { const framework::Tensor& learning_rate, T epsilon, framework::Tensor* moment, framework::Tensor* param) { // 1. g_m.rows = set(g.rows) - auto grad_rows = grad.rows(); - std::set row_set(grad_rows.begin(), grad_rows.end()); - std::vector merge_rows(row_set.begin(), row_set.end()); - auto grad_width = grad.value().dims()[1]; - std::unique_ptr grad_merge{ - new framework::SelectedRows()}; - grad_merge->set_rows(merge_rows); - grad_merge->set_height(grad.height()); - grad_merge->mutable_value()->mutable_data( - framework::make_ddim( - {static_cast(merge_rows.size()), grad_width}), - context.GetPlace()); - - math::SetConstant constant_functor; - constant_functor(context, grad_merge->mutable_value(), 0.0); - - auto* grad_merge_data = grad_merge->mutable_value()->data(); - auto* grad_data = grad.value().data(); - - for (size_t i = 0; i < grad_rows.size(); i++) { - size_t grad_merge_i = FindPos(merge_rows, grad_rows[i]); - for (int64_t j = 0; j < grad_width; j++) { - grad_merge_data[grad_merge_i * grad_width + j] += - grad_data[i * grad_width + j]; - } - } + math::scatter::MergeAdd merge_func; + auto grad_merge = merge_func(context, grad); + auto& merge_rows = grad_merge.rows(); + auto* grad_merge_data = grad_merge.mutable_value()->template data(); // 2. m += g_m * g_m - std::unique_ptr grad_square{ - new framework::SelectedRows()}; - grad_square->set_rows(grad_merge->rows()); - grad_square->set_height(grad_merge->height()); - grad_square->mutable_value()->mutable_data(grad_merge->value().dims(), - context.GetPlace()); - auto gs = - framework::EigenVector::Flatten(*(grad_square->mutable_value())); - auto gm = framework::EigenVector::Flatten(grad_merge->value()); - gs.device(*context.eigen_device()) = gm * gm; + math::scatter::Mul sqare_func; + auto grad_square = sqare_func(context, grad_merge, grad_merge); math::SelectedRowsAddToTensor functor; - functor(context, *grad_square, moment); + functor(context, grad_square, moment); // 3. update parameter auto* lr = learning_rate.data(); diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu index 585b2d92894af65b8ed15a596f0377fdcf564cfa..4e579387924a5b0499f29609bc6b1322030a3c0d 100644 --- a/paddle/operators/adagrad_op.cu +++ b/paddle/operators/adagrad_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/adagrad_op.h" @@ -78,62 +78,30 @@ struct SparseAdagradFunctor { const framework::Tensor& learning_rate, T epsilon, framework::Tensor* moment, framework::Tensor* param) { // 1. g_m.rows = set(g.rows) - auto grad_rows = grad.rows(); - std::set row_set(grad_rows.begin(), grad_rows.end()); - std::vector merge_rows(row_set.begin(), row_set.end()); - auto grad_width = grad.value().dims()[1]; - std::unique_ptr grad_merge{ - new framework::SelectedRows()}; - grad_merge->set_rows(merge_rows); - grad_merge->set_height(grad.height()); - grad_merge->mutable_value()->mutable_data( - framework::make_ddim( - {static_cast(merge_rows.size()), grad_width}), - context.GetPlace()); - - math::SetConstant constant_functor; - constant_functor(context, grad_merge->mutable_value(), 0.0); - - auto* grad_merge_data = grad_merge->mutable_value()->data(); - auto* grad_data = grad.value().data(); - - const int block_size = 256; - dim3 threads(block_size, 1); - dim3 grid1(1, grad_rows.size()); - - MergeGradKernel< - T, 256><<(context) - .stream()>>>(grad_data, grad.rows().data(), - grad_merge_data, grad_merge->rows().data(), - grad_merge->rows().size(), grad_width); - + math::scatter::MergeAdd merge_func; + auto grad_merge = merge_func(context, grad); + auto* grad_merge_data = grad_merge.mutable_value()->template data(); + auto& merge_rows = grad_merge.rows(); // 2. m += g_m * g_m - std::unique_ptr grad_square{ - new framework::SelectedRows()}; - grad_square->set_rows(grad_merge->rows()); - grad_square->set_height(grad_merge->height()); - grad_square->mutable_value()->mutable_data(grad_merge->value().dims(), - context.GetPlace()); - auto gs = - framework::EigenVector::Flatten(*(grad_square->mutable_value())); - auto gm = framework::EigenVector::Flatten(grad_merge->value()); - gs.device(*context.eigen_device()) = gm * gm; + math::scatter::Mul sqare_func; + auto grad_square = sqare_func(context, grad_merge, grad_merge); math::SelectedRowsAddToTensor functor; - functor(context, *grad_square, moment); + functor(context, grad_square, moment); // 3. update parameter auto* lr = learning_rate.data(); auto* param_data = param->data(); auto* moment_data = moment->data(); + const int block_size = 256; + dim3 threads(block_size, 1); dim3 grid2(1, merge_rows.size()); SparseAdagradFunctorKernel< T, 256><<(context) - .stream()>>>(grad_merge_data, grad_merge->rows().data(), + .stream()>>>(grad_merge_data, grad_merge.rows().data(), lr, param_data, moment_data, grad_width, epsilon); } diff --git a/paddle/operators/adagrad_op.h b/paddle/operators/adagrad_op.h index 0d77dbcbacd4efb6c1900e57b5c4ea9e9b136771..66f5b0f449a4f11a3c734c98a6a97833763348a1 100644 --- a/paddle/operators/adagrad_op.h +++ b/paddle/operators/adagrad_op.h @@ -47,8 +47,7 @@ class AdagradOpKernel : public framework::OpKernel { *ctx.Input("Grad")); auto moment = framework::EigenVector::Flatten( *ctx.Input("Moment")); - auto lr = framework::EigenVector::Flatten( - *ctx.Input("LearningRate")); + auto* learning_rate = ctx.Input("LearningRate"); auto param_out = framework::EigenVector::Flatten(*param_out_tensor); auto moment_out = framework::EigenVector::Flatten(*moment_out_tensor); @@ -56,8 +55,16 @@ class AdagradOpKernel : public framework::OpKernel { moment_out.device(*place) = moment + grad * grad; Eigen::DSizes m_dsize(moment_out_tensor->numel()); - param_out.device(*place) = - param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); + if (platform::is_cpu_place(ctx.GetPlace())) { + auto* lr = learning_rate->data(); + param_out.device(*place) = + param - lr[0] * grad / (moment_out.sqrt() + epsilon); + } else { + auto lr = framework::EigenVector::Flatten(*learning_rate); + param_out.device(*place) = + param - + lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); + } } else if (grad_var->IsType()) { auto* param_tensor = ctx.Input("Param"); PADDLE_ENFORCE_EQ(param_tensor, param_out_tensor); diff --git a/paddle/operators/adam_op.cu b/paddle/operators/adam_op.cu index c135b3737899a1ae92041b4759698ddc30c20e12..94f840c188942a900858429bc621c3a18d5900ad 100644 --- a/paddle/operators/adam_op.cu +++ b/paddle/operators/adam_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/adam_op.h" diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h index 45157842a6f92348909498f83d304d53b36c7d47..9cc34bdded780e61e8700eb4fa4a295c84fb48bc 100644 --- a/paddle/operators/adam_op.h +++ b/paddle/operators/adam_op.h @@ -13,59 +13,210 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/framework/eigen.h" +#include // for sqrt in CPU and CUDA #include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/safe_ref.h" +#include "paddle/operators/math/selected_rows_functor.h" +#include "paddle/platform/for_range.h" namespace paddle { namespace operators { +namespace scatter = paddle::operators::math::scatter; + +template +struct AdamFunctor { + T beta1_; + T beta2_; + T epsilon_; + + const T* beta1_pow_; + const T* beta2_pow_; + const T* moment1_; + T* moment1_out_; + const T* moment2_; + T* moment2_out_; + const T* lr_; + const T* grad_; + const T* param_; + T* param_out_; + + AdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow, + const T* beta2_pow, const T* mom1, T* mom1_out, const T* mom2, + T* mom2_out, const T* lr, const T* grad, const T* param, + T* param_out) + : beta1_(beta1), + beta2_(beta2), + epsilon_(epsilon), + beta1_pow_(beta1_pow), + beta2_pow_(beta2_pow), + moment1_(mom1), + moment1_out_(mom1_out), + moment2_(mom2), + moment2_out_(mom2_out), + lr_(lr), + grad_(grad), + param_(param), + param_out_(param_out) {} + + inline HOSTDEVICE void operator()(size_t i) const { + // Merge all memory access together. + T g = grad_[i]; + T mom1 = moment1_[i]; + T mom2 = moment2_[i]; + T lr = *lr_; + T beta1_pow = *beta1_pow_; + T beta2_pow = *beta2_pow_; + T p = param_[i]; + + // Calculation + lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow); + mom1 = beta1_ * mom1 + (1 - beta1_) * g; + mom2 = beta2_ * mom2 + (1 - beta2_) * g * g; + p -= lr * (mom1 / (sqrt(mom2) + epsilon_)); + + // Write back to global memory + moment1_out_[i] = mom1; + moment2_out_[i] = mom2; + param_out_[i] = p; + } +}; + +template +struct SparseAdamFunctor { + T beta1_; + T beta2_; + T epsilon_; + + const T* beta1_pow_; + const T* beta2_pow_; + const T* moment1_; + T* moment1_out_; + const T* moment2_; + T* moment2_out_; + const T* lr_; + const T* grad_; + const T* param_; + T* param_out_; + + const int64_t* rows_; + int64_t row_numel_; + + SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow, + const T* beta2_pow, const T* mom1, T* mom1_out, + const T* mom2, T* mom2_out, const T* lr, const T* grad, + const T* param, T* param_out, const int64_t* rows, + int64_t row_numel) + : beta1_(beta1), + beta2_(beta2), + epsilon_(epsilon), + beta1_pow_(beta1_pow), + beta2_pow_(beta2_pow), + moment1_(mom1), + moment1_out_(mom1_out), + moment2_(mom2), + moment2_out_(mom2_out), + lr_(lr), + grad_(grad), + param_(param), + param_out_(param_out), + rows_(rows), + row_numel_(row_numel) {} + + inline HOSTDEVICE void operator()(size_t i) const { + T beta1_pow = *beta1_pow_; + T beta2_pow = *beta2_pow_; + for (int64_t j = 0; j < row_numel_; ++j) { + T g = grad_[i * row_numel_ + j]; + T mom1 = moment1_[rows_[i] * row_numel_ + j]; + T mom2 = moment2_[rows_[i] * row_numel_ + j]; + T lr = *lr_; + T p = param_[rows_[i] * row_numel_ + j]; + + lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow); + mom1 = beta1_ * mom1 + (1 - beta1_) * g; + mom2 = beta2_ * mom2 + (1 - beta2_) * g * g; + p -= lr * (mom1 / (sqrt(mom2) + epsilon_)); + + moment1_out_[rows_[i] * row_numel_ + j] = mom1; + moment2_out_[rows_[i] * row_numel_ + j] = mom2; + param_out_[rows_[i] * row_numel_ + j] = p; + } // for col id + } +}; + template class AdamOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto param_out_tensor = ctx.Output("ParamOut"); - auto moment1_out_tensor = ctx.Output("Moment1Out"); - auto moment2_out_tensor = ctx.Output("Moment2Out"); - - param_out_tensor->mutable_data(ctx.GetPlace()); - moment1_out_tensor->mutable_data(ctx.GetPlace()); - moment2_out_tensor->mutable_data(ctx.GetPlace()); + using paddle::framework::LoDTensor; + using paddle::operators::detail::Ref; T beta1 = static_cast(ctx.Attr("beta1")); T beta2 = static_cast(ctx.Attr("beta2")); T epsilon = static_cast(ctx.Attr("epsilon")); + auto& param = Ref(ctx.Input("Param"), "Must set Param"); + // auto& grad = Ref(ctx.Input("Grad"), "Must set Grad"); + auto* grad_var = ctx.InputVar("Grad"); + auto& mom1 = Ref(ctx.Input("Moment1"), "Must set Moment1"); + auto& mom2 = Ref(ctx.Input("Moment2"), "Must set Moment2"); + auto& lr = + Ref(ctx.Input("LearningRate"), "Must set LearningRate"); + + auto& beta1_pow = + Ref(ctx.Input("Beta1Pow"), "Must set Beta1Pow"); + auto& beta2_pow = + Ref(ctx.Input("Beta2Pow"), "Must set Beta2Pow"); + + auto& param_out = + Ref(ctx.Output("ParamOut"), "Must set ParamOut"); + auto& mom1_out = + Ref(ctx.Output("Moment1Out"), "Must set Moment1Out"); + auto& mom2_out = + Ref(ctx.Output("Moment2Out"), "Must set Moment1Out"); + + if (grad_var->IsType()) { + auto& grad = Ref(ctx.Input("Grad"), "Must set Grad"); + AdamFunctor functor( + beta1, beta2, epsilon, beta1_pow.template data(), + beta2_pow.template data(), mom1.template data(), + mom1_out.template mutable_data(ctx.GetPlace()), + mom2.template data(), + mom2_out.template mutable_data(ctx.GetPlace()), + lr.template data(), grad.template data(), + param.template data(), + param_out.template mutable_data(ctx.GetPlace())); + platform::ForRange for_range( + static_cast(ctx.device_context()), + param.numel()); + for_range(functor); + } else if (grad_var->IsType()) { + auto& grad = + Ref(ctx.Input("Grad"), "Must set Grad"); + // merge duplicated rows if any. + scatter::MergeAdd merge_func; + auto grad_merge = + merge_func(ctx.template device_context(), grad); + auto& grad_tensor = grad_merge.value(); + const T* grad_data = grad_tensor.template data(); + auto* rows = grad_merge.rows().data(); + auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); - auto param = framework::EigenVector::Flatten( - *ctx.Input("Param")); - auto grad = framework::EigenVector::Flatten( - *ctx.Input("Grad")); - auto moment1 = framework::EigenVector::Flatten( - *ctx.Input("Moment1")); - auto moment2 = framework::EigenVector::Flatten( - *ctx.Input("Moment2")); - auto lr = framework::EigenVector::Flatten( - *ctx.Input("LearningRate")); - auto beta1_pow = framework::EigenVector::Flatten( - *ctx.Input("Beta1Pow")); - auto beta2_pow = framework::EigenVector::Flatten( - *ctx.Input("Beta2Pow")); - auto param_out = framework::EigenVector::Flatten(*param_out_tensor); - auto moment1_out = framework::EigenVector::Flatten(*moment1_out_tensor); - auto moment2_out = framework::EigenVector::Flatten(*moment2_out_tensor); - auto* place = ctx.template device_context().eigen_device(); - - moment1_out.device(*place) = beta1 * moment1 + (1 - beta1) * grad; - moment2_out.device(*place) = beta2 * moment2 + (1 - beta2) * grad.square(); - - // All of these are tensors of 1 element - auto lr_t = lr * (1 - beta2_pow).sqrt() / (1 - beta1_pow); - // Eigen does not support automatic broadcast - // Get dimensions of moment vector to broadcast lr_t - Eigen::DSizes m_dsize(moment1_out_tensor->numel()); - param_out.device(*place) = - param - - lr_t.broadcast(m_dsize) * - (moment1_out / (moment2_out.sqrt() + epsilon)); + SparseAdamFunctor functor( + beta1, beta2, epsilon, beta1_pow.template data(), + beta2_pow.template data(), mom1.template data(), + mom1_out.template mutable_data(ctx.GetPlace()), + mom2.template data(), + mom2_out.template mutable_data(ctx.GetPlace()), + lr.template data(), grad_data, param.template data(), + param_out.template mutable_data(ctx.GetPlace()), rows, row_numel); + platform::ForRange for_range( + static_cast(ctx.device_context()), + grad_merge.rows().size()); + for_range(functor); + } else { + PADDLE_THROW("Variable type not supported by adam_op"); + } } }; diff --git a/paddle/operators/adamax_op.cu b/paddle/operators/adamax_op.cu index 2d143905c4819dbf5f94391bdcf093971849e7a3..8f87bb28671018a184f25a014f9bdb7615f3040c 100644 --- a/paddle/operators/adamax_op.cu +++ b/paddle/operators/adamax_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/adamax_op.h" diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h index 1f2b4fdb4b4a99d5baf5de1cc226dc196ab4eb2e..e0eef5d9f93d70930ee82d663de9610cc0176e33 100644 --- a/paddle/operators/array_operator.h +++ b/paddle/operators/array_operator.h @@ -1,20 +1,21 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -27,11 +28,16 @@ class ArrayOp : public framework::OperatorBase { protected: size_t GetOffset(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const { + const platform::Place &place) const { auto *i = scope.FindVar(Input("I")); PADDLE_ENFORCE(i != nullptr, "I must be set"); auto &i_tensor = i->Get(); PADDLE_ENFORCE_EQ(i_tensor.numel(), 1); + + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + size_t offset; if (platform::is_gpu_place(i_tensor.place())) { // FIXME: Avoid copy from GPU to CPU diff --git a/paddle/operators/array_to_lod_tensor_op.cc b/paddle/operators/array_to_lod_tensor_op.cc index b6ca3cad94425207629160a4c7d715f685b23a09..49366fee8df5a44a97b7b4e87cbf0b7c813a414a 100644 --- a/paddle/operators/array_to_lod_tensor_op.cc +++ b/paddle/operators/array_to_lod_tensor_op.cc @@ -1,21 +1,23 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include + #include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" #include "paddle/memory/memcpy.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -30,7 +32,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto &x = scope.FindVar(Input("X"))->Get(); auto &rank_table = scope.FindVar(Input("RankTable"))->Get(); @@ -103,6 +105,11 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { continue; } auto slice = out->Slice(out_offset, out_offset + len); + + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, dev_ctx, &slice); out_offset += len; diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc index a914ff4ba92318c75326bd7945bb73bcb93b6fc3..7d77be3be1034bb38f6c92c181aa525214073eec 100644 --- a/paddle/operators/assign_op.cc +++ b/paddle/operators/assign_op.cc @@ -1,20 +1,21 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/data_type.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/var_type.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -71,7 +72,7 @@ class AssignOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); if (x == nullptr) { return; @@ -80,6 +81,10 @@ class AssignOp : public framework::OperatorBase { PADDLE_ENFORCE( out != nullptr, "The Output(Out) should not be null if the Input(X) is set."); + + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + framework::VisitVarType(*x, AssignFunctor(out, dev_ctx)); } }; diff --git a/paddle/operators/auc_op.cc b/paddle/operators/auc_op.cc index 811c487089fcf4044f129ad6bf95b46535d4fcd6..c16bc11931e6733d567107913521eafc34a30066 100644 --- a/paddle/operators/auc_op.cc +++ b/paddle/operators/auc_op.cc @@ -39,7 +39,7 @@ class AucOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Out")->type()), diff --git a/paddle/operators/batch_norm_op.cc b/paddle/operators/batch_norm_op.cc index 1c14acbe11fbad9654bd0309f5674176ebdb5e6f..dd7b038b00813b192177c05dc06aa165a60b5156 100644 --- a/paddle/operators/batch_norm_op.cc +++ b/paddle/operators/batch_norm_op.cc @@ -50,10 +50,6 @@ class BatchNormOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), ""); PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), ""); - const float epsilon = ctx->Attrs().Get("epsilon"); - PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0"); - PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large"); - // make sure Mean/MeanOut and Variance/VarianceOut share memory in Python PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0], "Mean and MeanOut should share the same memory"); @@ -68,7 +64,7 @@ class BatchNormOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5, "Input X must have 2 to 5 dimensions."); - const int C = + const int64_t C = (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); @@ -82,6 +78,7 @@ class BatchNormOp : public framework::OperatorWithKernel { ctx->SetOutputDim("VarianceOut", {C}); ctx->SetOutputDim("SavedMean", {C}); ctx->SetOutputDim("SavedVariance", {C}); + ctx->ShareLoD("X", "Y"); } }; @@ -91,7 +88,12 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddAttr("is_test", "").SetDefault(false); AddAttr("momentum", "").SetDefault(0.9); - AddAttr("epsilon", "").SetDefault(1e-5); + AddAttr("epsilon", "") + .SetDefault(1e-5) + .AddCustomChecker([](const float &epsilon) { + PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f, + "'epsilon' should be between 0.0 and 0.001."); + }); AddAttr("data_layout", "").SetDefault("NCHW"); AddInput("X", "The input tensor"); AddInput("Scale", @@ -304,7 +306,7 @@ class BatchNormGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { diff --git a/paddle/operators/batch_norm_op.cu.cc b/paddle/operators/batch_norm_op.cu.cc index 55d0736a4c8e09eea637e5ab7e49af9a618e7fd8..3d17725ab47682355b2093782848849857f9bf59 100644 --- a/paddle/operators/batch_norm_op.cu.cc +++ b/paddle/operators/batch_norm_op.cu.cc @@ -53,7 +53,7 @@ class BatchNormKernel public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); double epsilon = static_cast(ctx.Attr("epsilon")); const float momentum = ctx.Attr("momentum"); const bool is_test = ctx.Attr("is_test"); @@ -179,7 +179,7 @@ class BatchNormGradKernel public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); double epsilon = static_cast(ctx.Attr("epsilon")); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc index 32756faac5324cfb3b5366857d2c8176665fb3ec..72e05607b0b612807d552b4c45b58f9d9ce9c2af 100644 --- a/paddle/operators/beam_search_decode_op.cc +++ b/paddle/operators/beam_search_decode_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/beam_search_decode_op.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -55,7 +56,10 @@ class BeamSearchDecodeOp : public framework::OperatorBase { const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + const platform::Place& dev_place) const override { + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& dev_ctx = *pool.Get(dev_place); + framework::ExecutionContext ctx(*this, scope, dev_ctx); const LoDTensorArray* ids = ctx.Input("Ids"); diff --git a/paddle/operators/beam_search_op.cc b/paddle/operators/beam_search_op.cc index 69ddc52035ae78dd2d1926b66fcbbe36737e87aa..2e0513b37a24b9737532b3a71f8f0724fbdd2c13 100644 --- a/paddle/operators/beam_search_op.cc +++ b/paddle/operators/beam_search_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/beam_search_op.h" diff --git a/paddle/operators/beam_search_op.h b/paddle/operators/beam_search_op.h index cc556bfe42ab12d73c0eb503d033efc272b5dd68..08b551ef9bd63106ed222d3a956a912294f827ec 100644 --- a/paddle/operators/beam_search_op.h +++ b/paddle/operators/beam_search_op.h @@ -189,7 +189,7 @@ class BeamSearchOp : public framework::OperatorBase { } void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + const platform::Place& dev_place) const override { LOG(INFO) << "run beam search op"; auto ids_var = scope.FindVar(Input("ids")); auto scores_var = scope.FindVar(Input("scores")); diff --git a/paddle/operators/cast_op.cc b/paddle/operators/cast_op.cc index fc6da064904610f5c9c140a6328858d697dd954e..446976edafca56f3c56fe573c8b5ef76a333089f 100644 --- a/paddle/operators/cast_op.cc +++ b/paddle/operators/cast_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/cast_op.h" #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/cast_op.cu b/paddle/operators/cast_op.cu index 91e6fb391c637cc0d70a401d8d834451059ef6df..d68bbe6e39a2fbaa92787731145ae324288b981a 100644 --- a/paddle/operators/cast_op.cu +++ b/paddle/operators/cast_op.cu @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/cast_op.h" diff --git a/paddle/operators/cast_op.h b/paddle/operators/cast_op.h index 0c72d809e67e8f3f25be5643041d89da3d04d95e..9f39d91edd49d236d74019ca81b42002e4f35d36 100644 --- a/paddle/operators/cast_op.h +++ b/paddle/operators/cast_op.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/chunk_eval_op.cc b/paddle/operators/chunk_eval_op.cc index f1f274a7af079d68c7c1bcd8ec07962e18b0ea60..a040404266c3cd44230b141cfed1aaede3f05187 100644 --- a/paddle/operators/chunk_eval_op.cc +++ b/paddle/operators/chunk_eval_op.cc @@ -55,7 +55,7 @@ class ChunkEvalOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType(framework::proto::DataType::FP32, ctx.device_context()); diff --git a/paddle/operators/clip_by_norm_op.cc b/paddle/operators/clip_by_norm_op.cc index 05c79d0e25deea84463f0b67ac4dc9a8dd43f2cb..b90921d79baa920f0b6f92cde2f7e1ca9183d0d2 100644 --- a/paddle/operators/clip_by_norm_op.cc +++ b/paddle/operators/clip_by_norm_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/clip_by_norm_op.h" diff --git a/paddle/operators/clip_by_norm_op.cu b/paddle/operators/clip_by_norm_op.cu index acd75438230715420470b81f7a5e5953bd8b8abe..cbf8fa44133739f948fed13e18fc5cbaabd3abb0 100644 --- a/paddle/operators/clip_by_norm_op.cu +++ b/paddle/operators/clip_by_norm_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/clip_by_norm_op.h" diff --git a/paddle/operators/clip_by_norm_op.h b/paddle/operators/clip_by_norm_op.h index d8db1566b0e8c9c351d3b6d6aca1d22d991fe76e..87956a707cf58afa2336602b8ab6acf73b0ff814 100644 --- a/paddle/operators/clip_by_norm_op.h +++ b/paddle/operators/clip_by_norm_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/clip_op.cc b/paddle/operators/clip_op.cc index e34ba0a8f4757e45db58270dfd6191157f6e226a..573bb9c7dfdac2366c2458dd9f27a035a9f9b813 100644 --- a/paddle/operators/clip_op.cc +++ b/paddle/operators/clip_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/clip_op.h" diff --git a/paddle/operators/clip_op.cu b/paddle/operators/clip_op.cu index bb7dcc671a46758a6bd09e8035cf8d3f5e464b3b..5ccbc9643407c65e8734711744ceac9814f4c6a2 100644 --- a/paddle/operators/clip_op.cu +++ b/paddle/operators/clip_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/clip_op.h" diff --git a/paddle/operators/clip_op.h b/paddle/operators/clip_op.h index 0c40797410950641d3d509a4980d5c4bdbd75cff..51db185dffd80cc3b839d063acaf3f936d732817 100644 --- a/paddle/operators/clip_op.h +++ b/paddle/operators/clip_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/compare_op.cc b/paddle/operators/compare_op.cc index 1148172f3a2cc9b3f849ee04cefc19f16742d3eb..44665b7872acab8178552e5504916408cf566d13 100644 --- a/paddle/operators/compare_op.cc +++ b/paddle/operators/compare_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/compare_op.h" #include "paddle/framework/op_registry.h" @@ -66,9 +66,9 @@ class CompareOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { - framework::OpKernelType kt = OperatorWithKernel::GetKernelType(ctx); + framework::OpKernelType kt = OperatorWithKernel::GetActualKernelType(ctx); // CompareOp kernel's device type is decided by input tensor place kt.place_ = ctx.Input("X")->place(); return kt; diff --git a/paddle/operators/compare_op.cu b/paddle/operators/compare_op.cu index 596a878bcf9f5b81c87c3bd419a2f46c0a450635..26049271befd1fe57001659d1a406e73de0004a7 100644 --- a/paddle/operators/compare_op.cu +++ b/paddle/operators/compare_op.cu @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/compare_op.h" diff --git a/paddle/operators/compare_op.h b/paddle/operators/compare_op.h index a56536e155531ac9ea3d17256210bdb9f4212181..567e89c0a727ad0cdd2add8ec8b2a42c86a58007 100644 --- a/paddle/operators/compare_op.h +++ b/paddle/operators/compare_op.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/operators/cond_op.cc b/paddle/operators/cond_op.cc index 8c860676e06de5dac9570d2a6f7271ff451eebee..e333002bfd1ab40c62882f09cd207a12a0939648 100644 --- a/paddle/operators/cond_op.cc +++ b/paddle/operators/cond_op.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/cond_op.h" - #include "paddle/operators/gather.h" #include "paddle/operators/scatter.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -193,12 +193,15 @@ void CondOp::MergeDataFromSubnet(const framework::Scope& scope, } } -void CondOp::Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const { +void CondOp::Run(const Scope& scope, const platform::Place& place) const { + // get device context from pool + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& dev_ctx = *pool.Get(place); + PrepareDataForSubnet(scope, dev_ctx); std::vector& sub_scopes = GetSubScopes(scope); for (int i = 0; i < BRANCH_NUM; ++i) { - sub_net_op_[i]->Run(*sub_scopes[i], dev_ctx); + sub_net_op_[i]->Run(*sub_scopes[i], place); } MergeDataFromSubnet(scope, dev_ctx); } diff --git a/paddle/operators/cond_op.h b/paddle/operators/cond_op.h index 93121fb31be287794249b5a62386d5a8dd268a0c..7dcdc47e0b2ff216bea92d083fe5897009384d39 100644 --- a/paddle/operators/cond_op.h +++ b/paddle/operators/cond_op.h @@ -78,7 +78,7 @@ class CondOp : public framework::OperatorBase { } void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override; + const platform::Place& place) const override; private: const int TRUE_BRANCH = 0; diff --git a/paddle/operators/conditional_block_op.cc b/paddle/operators/conditional_block_op.cc index 204be7d1e5385b7fdab54914bec216543e360cd3..3cae61a438431e72cb24d714c761676cc0c3a41f 100644 --- a/paddle/operators/conditional_block_op.cc +++ b/paddle/operators/conditional_block_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include "paddle/framework/executor.h" #include "paddle/framework/op_registry.h" @@ -51,7 +51,7 @@ class ConditionalBlockOp : public ConditionalOp { const framework::AttributeMap &attrs) : ConditionalOp(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto xs = InputTensors(scope); bool need_run = std::all_of( xs.begin(), xs.end(), @@ -65,8 +65,8 @@ class ConditionalBlockOp : public ConditionalOp { scopes->front() = &scope.NewScope(); auto &cur_scope = *scopes->front(); + framework::Executor exec(dev_place); auto *block = Attr("sub_block"); - framework::Executor exec(dev_ctx); exec.Run(*block->Program(), &cur_scope, block->ID(), false); } } @@ -104,7 +104,7 @@ class ConditionalBlockGradOp : public ConditionalOp { const framework::AttributeMap &attrs) : ConditionalOp(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto xs = this->InputTensors(scope); bool need_run = std::all_of( xs.begin(), xs.end(), @@ -116,21 +116,21 @@ class ConditionalBlockGradOp : public ConditionalOp { auto &scopes = scope_var->Get>(); framework::Scope &cur_scope = *scopes[0]; + framework::Executor exec(dev_place); auto *block = Attr("sub_block"); - framework::Executor exec(dev_ctx); exec.Run(*block->Program(), &cur_scope, block->ID(), false); - AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("Params"), + AssignLocalGradientToGlobal(dev_place, cur_scope, Inputs("Params"), Outputs(framework::GradVarName("Params"))); - AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("X"), + AssignLocalGradientToGlobal(dev_place, cur_scope, Inputs("X"), Outputs(framework::GradVarName("X"))); } } private: void AssignLocalGradientToGlobal( - const platform::DeviceContext &dev_ctx, const framework::Scope &cur_scope, + const platform::Place &place, const framework::Scope &cur_scope, const std::vector &p_names, const std::vector &pg_names) const { for (size_t i = 0; i < p_names.size(); ++i) { @@ -144,7 +144,7 @@ class ConditionalBlockGradOp : public ConditionalOp { auto assign = framework::OpRegistry::CreateOp( "assign", {{"X", {new_in_grad_name}}}, {{"Out", {out_grad_name}}}, framework::AttributeMap{}); - assign->Run(cur_scope, dev_ctx); + assign->Run(cur_scope, place); cur_scope.Rename(new_in_grad_name, in_grad_name); } } diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 5b27ada55d737c31f8e65dc9b460a3a2ea11b869..84d9ce1973a4cccadcb8f78feaecbcaa9e7af312 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_op.h" diff --git a/paddle/operators/conv_cudnn_op.cu.cc b/paddle/operators/conv_cudnn_op.cu.cc index 3da0a9001aafbb5b2c4b9a91c4527d9437ac38a1..0c5ed3e4e80304c6fd174975166804347feb18b1 100644 --- a/paddle/operators/conv_cudnn_op.cu.cc +++ b/paddle/operators/conv_cudnn_op.cu.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" @@ -36,7 +36,7 @@ class CudnnConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* input = ctx.Input("Input"); auto* filter = ctx.Input("Filter"); auto* output = ctx.Output("Output"); @@ -130,7 +130,7 @@ class CudnnConvOpKernel : public framework::OpKernel { handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, cudnn_output_desc, algo, &workspace_size_in_bytes)); // Allocate on GPU memory - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv forward --------------------- T alpha = 1.0f, beta = 0.0f; @@ -151,7 +151,7 @@ class CudnnConvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto input = ctx.Input("Input"); auto filter = ctx.Input("Filter"); auto output_grad = ctx.Input(framework::GradVarName("Output")); @@ -277,7 +277,7 @@ class CudnnConvGradOpKernel : public framework::OpKernel { // ------------------- cudnn conv workspace --------------------- // Already on GPU void* cudnn_workspace = nullptr; - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv backward data --------------------- T alpha = 1.0f, beta = 0.0f; @@ -315,6 +315,7 @@ class CudnnConvGradOpKernel : public framework::OpKernel { } // namespace operators } // namespace paddle +// TODO(dzhwinter) : below register should be removed REGISTER_OP_CUDA_KERNEL(conv2d_cudnn, paddle::operators::CudnnConvOpKernel, paddle::operators::CudnnConvOpKernel); diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc index abe82e124121a6c57d1c3ca7337804f5a4ab3d38..ad84524e1785e3b6b4586c83001852b6dba7afe8 100644 --- a/paddle/operators/conv_op.cc +++ b/paddle/operators/conv_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_op.h" @@ -31,8 +31,6 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { std::vector paddings = ctx->Attrs().Get>("paddings"); int groups = ctx->Attrs().Get("groups"); std::vector dilations = ctx->Attrs().Get>("dilations"); - int input_channels = in_dims[1]; - int output_channels = filter_dims[0]; PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, "Conv intput should be 4-D or 5-D tensor."); @@ -45,11 +43,13 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ( paddings.size(), strides.size(), "Conv paddings dimension and Conv strides dimension should be the same."); - PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, + + PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups, "The number of input channels should be equal to filter " "channels * groups."); + PADDLE_ENFORCE_EQ( - output_channels % groups, 0, + filter_dims[0] % groups, 0, "The number of output channels should be divided by groups."); std::vector output_shape({in_dims[0], filter_dims[0]}); @@ -64,6 +64,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { dilations[i], paddings[i], strides[i])); } ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); + ctx->ShareLoD("Input", "Output"); } Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker) diff --git a/paddle/operators/conv_op.cu.cc b/paddle/operators/conv_op.cu.cc index 38615a8befab91633423b7cd8536253a0d049ac3..4f942444f3eb5584f07399b8d1b4d6a5087496d4 100644 --- a/paddle/operators/conv_op.cu.cc +++ b/paddle/operators/conv_op.cu.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_op.h" diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index 83786e2329e7ae3c2908fdfdaeb1f79d19a53f47..fe3c0bc9302257d444c7431c40c8ab7e4c1fe0e7 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -62,12 +62,25 @@ class ConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + framework::OpKernelType GetExpectedKernelType( + const framework::OpKernelType& kernel) const override { + return framework::OpKernelType(kernel.data_type_, platform::CUDAPlace(0), + kernel.data_layout_, + framework::LibraryType::kCUDNN); + } }; class ConvOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + + framework::OpKernelType GetExpectedKernelType( + const framework::OpKernelType& kernel) const override { + return framework::OpKernelType(kernel.data_type_, platform::CUDAPlace(0), + kernel.data_layout_, + framework::LibraryType::kCUDNN); + } }; template diff --git a/paddle/operators/conv_shift_op.cc b/paddle/operators/conv_shift_op.cc index ac2f80625935e14189d27bf738e9b9985a7f42c2..106b68a0a0e787a0c9da2de924f4646c77b42b41 100644 --- a/paddle/operators/conv_shift_op.cc +++ b/paddle/operators/conv_shift_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_shift_op.h" #include "paddle/framework/eigen.h" diff --git a/paddle/operators/conv_shift_op.cu b/paddle/operators/conv_shift_op.cu index f7ca82ce2635f9ef9d7e9a062d148448e61c163c..cf7abc196e1293ab1b998d1a8cb9c361a7c2d427 100644 --- a/paddle/operators/conv_shift_op.cu +++ b/paddle/operators/conv_shift_op.cu @@ -1,16 +1,16 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_shift_op.h" #include "paddle/operators/math/math_function.h" diff --git a/paddle/operators/conv_shift_op.h b/paddle/operators/conv_shift_op.h index 1a70b38a0d8cb82ad1f818148306b7ec5f334744..6781d87ef0d99a0b0fc4747245920b6a38a33804 100644 --- a/paddle/operators/conv_shift_op.h +++ b/paddle/operators/conv_shift_op.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc index 8980ff91f5d7c585d9ce0ce62cfb90f47ea86ec6..2e5333a265f2f59f31c651b8bb080599ec6e31a4 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_transpose_op.h" diff --git a/paddle/operators/conv_transpose_cudnn_op.cu.cc b/paddle/operators/conv_transpose_cudnn_op.cu.cc index f0297f6c40c132c28b50184997d657451f26362b..fc37776ba1ed35aa6b2523eb593e9713cfcc54eb 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cu.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" @@ -35,7 +35,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* input = ctx.Input("Input"); auto* filter = ctx.Input("Filter"); auto* output = ctx.Output("Output"); @@ -100,7 +100,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { cudnn_output_desc, algo, &workspace_size_in_bytes)); // Allocate on GPU memory - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv transpose forward --------------------- @@ -120,7 +120,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto input = ctx.Input("Input"); auto filter = ctx.Input("Filter"); auto output_grad = ctx.Input(framework::GradVarName("Output")); @@ -201,7 +201,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { // ------------------- cudnn conv workspace --------------------- // Already on GPU void* cudnn_workspace = nullptr; - platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv backward data --------------------- // FIXME(typhoonzero): template type T may not be the same as cudnn call. diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 5e24fc4b2c8b479caac417c957033f7552e1c3f0..74636d138f1e40474a1cc5453609dafe14fcaaab 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_transpose_op.h" diff --git a/paddle/operators/conv_transpose_op.cu.cc b/paddle/operators/conv_transpose_op.cu.cc index b91ebd7922f2e101df8d6ef5892a62ec5a10cf99..f1d827c606283440debb9a0edb25168816a3a08c 100644 --- a/paddle/operators/conv_transpose_op.cu.cc +++ b/paddle/operators/conv_transpose_op.cu.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/conv_transpose_op.h" diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc index a4d4a78d3200259403695a73ed9cfabe9baf8876..9019a1edb379be4007e38d3c0dc71feae23ae4e8 100644 --- a/paddle/operators/cos_sim_op.cc +++ b/paddle/operators/cos_sim_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/cos_sim_op.h" diff --git a/paddle/operators/cos_sim_op.cu b/paddle/operators/cos_sim_op.cu index 1cb01f5945f691747bac609ca4a93e2d15cde5bf..9e5d1b6e4f0b6e482edd96df93d535e05dba3bc6 100644 --- a/paddle/operators/cos_sim_op.cu +++ b/paddle/operators/cos_sim_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/cos_sim_op.h" diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index fecb5a79b2397dd73d991a1a87efcf84d60ef882..eadcca55f9bfc3e59f329df8ff419ad4c5a29007 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -1,31 +1,27 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once -#include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/cos_sim_functor.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/platform/for_range.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; -template -using EigenMatrix = framework::EigenMatrix; -template -using EigenVector = framework::EigenVector; template class CosSimKernel : public framework::OpKernel { @@ -41,28 +37,25 @@ class CosSimKernel : public framework::OpKernel { out_x_norm->mutable_data(context.GetPlace()); out_y_norm->mutable_data(context.GetPlace()); - // convert Tensor to Eigen Tensor int rows_x = in_x->dims()[0]; int rows_y = in_y->dims()[0]; - auto x = EigenMatrix::Reshape(*in_x, 1); - auto y = EigenMatrix::Reshape(*in_y, 1); - auto z = EigenVector::Flatten(*out_z); - auto x_norm = EigenVector::Flatten(*out_x_norm); - auto y_norm = EigenVector::Flatten(*out_y_norm); - // compute - auto& place = - *context.template device_context().eigen_device(); - auto row_along = Eigen::array({{1}}); - x_norm.device(place) = x.square().sum(row_along).sqrt(); - y_norm.device(place) = y.square().sum(row_along).sqrt(); + int cols = framework::product(in_x->dims()) / rows_x; + if (rows_x == rows_y) { - auto xy = (x * y).sum(Eigen::array({{1}})); - z.device(place) = xy / x_norm / y_norm; + math::CosSimFunctor functor( + in_x->data(), in_y->data(), out_x_norm->data(), + out_y_norm->data(), out_z->data(), cols); + platform::ForRange for_range( + static_cast(context.device_context()), rows_x); + for_range(functor); } else { - Eigen::DSizes bcast(rows_x, 1); - auto xy = (x * y.broadcast(bcast)).sum(row_along); - z.device(place) = xy / x_norm / y_norm.broadcast(bcast); + math::CosSimFunctor functor( + in_x->data(), in_y->data(), out_x_norm->data(), + out_y_norm->data(), out_z->data(), cols); + platform::ForRange for_range( + static_cast(context.device_context()), rows_x); + for_range(functor); } } }; @@ -81,62 +74,54 @@ class CosSimGradKernel : public framework::OpKernel { auto* out_grad_y = context.Output(framework::GradVarName("Y")); auto* in_grad_z = context.Input(framework::GradVarName("Out")); - // convert Tensor to Eigen Tensor - auto x = EigenMatrix::Reshape(*in_x, 1); - auto y = EigenMatrix::Reshape(*in_y, 1); - auto z = EigenMatrix::Reshape(*in_z, 1); - auto x_norm = EigenMatrix::Reshape(*in_x_norm, 1); - auto y_norm = EigenMatrix::Reshape(*in_y_norm, 1); - auto dz = EigenMatrix::Reshape(*in_grad_z, 1); - // compute gradident int rows_x = in_x->dims()[0]; int rows_y = in_y->dims()[0]; int cols = framework::product(in_x->dims()) / rows_x; - Eigen::DSizes bcast_cols(1, cols); - auto z_bcast = z.broadcast(bcast_cols); - auto dz_bcast = dz.broadcast(bcast_cols); - auto x_snorm_bcast = x_norm.square().eval().broadcast(bcast_cols); - auto& place = - *context.template device_context().eigen_device(); + if (rows_x == rows_y) { - auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast_cols); - auto norm_prod_bcast = (x_norm * y_norm).eval().broadcast(bcast_cols); - // compute dx if (out_grad_x) { - out_grad_x->mutable_data(context.GetPlace()); - auto dx = EigenMatrix::Reshape(*out_grad_x, 1); - auto grad = y / norm_prod_bcast - z_bcast * x / x_snorm_bcast; - dx.device(place) = dz_bcast * grad; + math::CosSimGradFunctor functor( + in_x_norm->data(), in_y_norm->data(), in_x->data(), + in_y->data(), in_z->data(), in_grad_z->data(), + out_grad_x->mutable_data(context.GetPlace()), cols); + platform::ForRange for_range( + static_cast(context.device_context()), + rows_x); + for_range(functor); } - // compute dy if (out_grad_y) { - out_grad_y->mutable_data(context.GetPlace()); - auto dy = EigenMatrix::Reshape(*out_grad_y, 1); - auto grad = x / norm_prod_bcast - z_bcast * y / y_snorm_bcast; - dy.device(place) = dz_bcast * grad; + math::CosSimGradFunctor functor( + in_y_norm->data(), in_x_norm->data(), in_y->data(), + in_x->data(), in_z->data(), in_grad_z->data(), + out_grad_y->mutable_data(context.GetPlace()), cols); + platform::ForRange for_range( + static_cast(context.device_context()), + rows_x); + for_range(functor); } } else { - Eigen::DSizes bcast_rows(rows_x, 1); - Eigen::DSizes bcast_rows_cols(rows_x, cols); - auto y_bcast = y.broadcast(bcast_rows); - auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast_rows_cols); - auto norm_prod_bcast = (x_norm * y_norm.eval().broadcast(bcast_rows)) - .eval() - .broadcast(bcast_cols); - // compute dx if (out_grad_x) { - out_grad_x->mutable_data(context.GetPlace()); - auto dx = EigenMatrix::Reshape(*out_grad_x, 1); - auto grad = y_bcast / norm_prod_bcast - z_bcast * x / x_snorm_bcast; - dx.device(place) = dz_bcast * grad; + math::CosSimDxFunctor functor( + in_x_norm->data(), in_y_norm->data(), in_x->data(), + in_y->data(), in_z->data(), in_grad_z->data(), + out_grad_x->mutable_data(context.GetPlace()), cols); + platform::ForRange for_range( + static_cast(context.device_context()), + rows_x); + for_range(functor); } - // compute dy if (out_grad_y) { out_grad_y->mutable_data(context.GetPlace()); - auto dy = EigenVector::Flatten(*out_grad_y); - auto grad = x / norm_prod_bcast - z_bcast * y_bcast / y_snorm_bcast; - dy.device(place) = (dz_bcast * grad).sum(Eigen::array({{0}})); + math::SetConstant set_zero; + auto& dev_ctx = context.template device_context(); + set_zero(dev_ctx, out_grad_y, static_cast(0)); + + math::CosSimDyFunctor functor; + functor(dev_ctx, in_x_norm->data(), in_y_norm->data(), + in_x->data(), in_y->data(), in_z->data(), + in_grad_z->data(), static_cast(rows_x), + static_cast(cols), out_grad_y->data()); } } } diff --git a/paddle/operators/crf_decoding_op.cc b/paddle/operators/crf_decoding_op.cc index 27d0871f82beed4ceb3a4439be097a580631d4c6..024e1d061a5b2eabc27110d5379cb8226a104079 100644 --- a/paddle/operators/crf_decoding_op.cc +++ b/paddle/operators/crf_decoding_op.cc @@ -120,12 +120,18 @@ class CRFDecodingOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Emission")->type()), ctx.device_context()); } + + framework::OpKernelType GetExpectedKernelType( + const framework::OpKernelType& actual_kernel_type) const override { + return framework::OpKernelType(actual_kernel_type.data_type_, + platform::CPUPlace()); + } }; } // namespace operators } // namespace paddle diff --git a/paddle/operators/crop_op.cc b/paddle/operators/crop_op.cc index 87fcab4cca669a356ced8951fbdc3c3ee3a24f3d..310e351443112c340054cf092cd2443b309ec49c 100644 --- a/paddle/operators/crop_op.cc +++ b/paddle/operators/crop_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/crop_op.h" #include diff --git a/paddle/operators/crop_op.cu b/paddle/operators/crop_op.cu index 90fd83ca10b750896a9fe144d3c30fabb2f54e0a..bba5db4c6ce682cb00482e35fa1e340aba83e37f 100644 --- a/paddle/operators/crop_op.cu +++ b/paddle/operators/crop_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/crop_op.h" diff --git a/paddle/operators/crop_op.h b/paddle/operators/crop_op.h index d531a19c783d2768d24142bb7b974ccfc2b39350..69d1a92977250b4e8a64b47ac66444724fbc53f6 100644 --- a/paddle/operators/crop_op.h +++ b/paddle/operators/crop_op.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 CropdleCropdle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 1ab7c0a06f85f332b290cb6cac82d0cfbe8f3242..fe39cb481aa63fa401603d97778b337282511ab5 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -51,7 +51,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of cross_entropy // is determined by its input "X". - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -101,7 +101,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of cross_entropy // is determined by its input "X". - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -114,15 +114,15 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { CrossEntropyOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", - "(Tensor, default Tensor), a 2-D tensor with shape N x D, " - "where N is the batch size and D is the number of classes. " + "(Tensor, default Tensor), a 2-D tensor with shape [N x D]," + " where N is the batch size and D is the number of classes. " "This input is a probability computed by the previous operator, " "which is almost always the result of a softmax operator."); AddInput("Label", "(Tensor), the ground truth which is a 2-D tensor. When " "soft_label is set to false, Label is a Tensor with shape " "[N x 1]. When soft_label is set to true, Label is a " - "Tensor with shape [N x K]."); + "Tensor with shape [N x D]."); AddOutput("Y", "(Tensor, default Tensor), a 2-D tensor with shape " "[N x 1]. The cross entropy loss."); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 05469645880fa466a2a3324ad1b7a8b9d681c440..3b04894e6ccb08c13e2d24bb38196fdc7935bf9e 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/cross_entropy_op.h" diff --git a/paddle/operators/decayed_adagrad_op.cu b/paddle/operators/decayed_adagrad_op.cu index 282b90f275ad1542d5941e001dbf646348fc01b6..7bc8161f2339572c2a9284f865846b9b7e594354 100644 --- a/paddle/operators/decayed_adagrad_op.cu +++ b/paddle/operators/decayed_adagrad_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/decayed_adagrad_op.h" diff --git a/paddle/operators/detail/recv_impl.cc b/paddle/operators/detail/recv_impl.cc index 517a1946a0c25081b20c320f4104c81503a4249e..319404e56a5f3c407f313991240bbbb85fd39a2a 100644 --- a/paddle/operators/detail/recv_impl.cc +++ b/paddle/operators/detail/recv_impl.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "send_recv_impl.h" @@ -21,14 +21,9 @@ namespace detail { Status SendRecvServerImpl::SendVariable(ServerContext *context, const VariableMessage *in_var, VoidMessage *out_var) { - // TODO(typhoonzero): support different variable types. - std::istringstream iss(in_var->serialized()); - framework::LoDTensor t; - framework::DeserializeFromStream(iss, &t); - TensorWithName tensor_with_name = - std::make_pair(in_var->varname(), std::move(t)); - - var_recv_queue_.Push(std::move(tensor_with_name)); + MessageWithName msg_with_name = + std::make_pair(in_var->varname(), std::move(*in_var)); + var_recv_queue_.Push(std::move(msg_with_name)); return Status::OK; } @@ -37,14 +32,8 @@ Status SendRecvServerImpl::GetVariable(ServerContext *context, VariableMessage *out_var) { std::string get_var_name = in_var->varname(); auto *var = scope_->FindVar(get_var_name); - auto tensor = var->Get(); - std::ostringstream oss; - framework::SerializeToStream(oss, tensor, platform::CPUDeviceContext()); - std::string *varname = out_var->mutable_varname(); - *varname = get_var_name; - std::string *serialized = out_var->mutable_serialized(); - *serialized = oss.str(); + SerializeToMessage(get_var_name, var, platform::CPUDeviceContext(), out_var); return Status::OK; } diff --git a/paddle/operators/detail/safe_ref.h b/paddle/operators/detail/safe_ref.h index b71af17309f9f46b5c87f0f479d4e03443fa7f93..ff2a156f3d0e3c5c55354375ca81adf433a49686 100644 --- a/paddle/operators/detail/safe_ref.h +++ b/paddle/operators/detail/safe_ref.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index d7165e13db961f7719139b1a40211d6a2ca67a9f..ae85cf2cec2cd8e046c0c7fd3408f2212f225819 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "send_recv_impl.h" @@ -27,14 +27,8 @@ bool RPCClient::SendVariable(const framework::Scope& scope, auto ctx = platform::CPUDeviceContext(); auto* var = scope.FindVar(inname); PADDLE_ENFORCE(var); - // TODO(typhoonzero): support SelectedRows - PADDLE_ENFORCE(var->IsType(), - "Only support LoDTensor, %s has wrong type", inname); - const framework::LoDTensor& tensor = var->Get(); - std::ostringstream oss; - framework::SerializeToStream(oss, tensor, ctx); - msg.set_varname(inname); - msg.set_serialized(oss.str()); + SerializeToMessage(inname, var, ctx, &msg); + Status status = stub_->SendVariable(&context, msg, &out_msg); if (!status.ok()) { LOG(ERROR) << "gRPC error: " << status.error_message(); @@ -50,19 +44,15 @@ bool RPCClient::GetVariable(const framework::Scope& scope, call_msg.set_varname(outname); auto ctx = platform::CPUDeviceContext(); Status status = stub_->GetVariable(&context, call_msg, &ret_msg); + auto* outvar = scope.FindVar(outname); if (!status.ok()) { LOG(ERROR) << "gRPC error: " << status.error_message(); return false; } std::istringstream iss(ret_msg.serialized()); + DeserializeFromMessage(ret_msg, ctx, outvar); - framework::LoDTensor ret_tensor; - framework::DeserializeFromStream(iss, &ret_tensor); - auto* outvar = scope.FindVar(outname); - framework::LoDTensor* out_tensor = outvar->GetMutable(); - // FIXME(typhoonzero): do not copy. - framework::CopyFrom(ret_tensor, ctx.GetPlace(), ctx, out_tensor); return true; } diff --git a/paddle/operators/detail/send_recv.proto b/paddle/operators/detail/send_recv.proto index ce729908062ad442e66cc00001e14ceb6f268560..f141c755ce14ef540aeab32c11c289179aff3f8c 100644 --- a/paddle/operators/detail/send_recv.proto +++ b/paddle/operators/detail/send_recv.proto @@ -1,19 +1,17 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under +the Apache License, Version 2.0 (the "License"); you may not use this file +except in compliance with the License. +You may obtain a copy of the License at - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ syntax = "proto3"; - package sendrecv; service SendRecvService { @@ -29,12 +27,18 @@ service SendRecvService { // VariableMessage is serialized paddle variable message. // It can be: -// Tensor // LoDTensor // SelectedRows +enum VarType { + LOD_TENSOR = 0; + SELECTED_ROWS = 1; +} + message VariableMessage { string varname = 1; - bytes serialized = 2; + // TODO(Yancey1989): reference framework::proto::VarDesc::VarType + VarType type = 2; + bytes serialized = 3; } message VoidMessage {} diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h index eec9dd38d188247cba4da2a377038a28c847e40e..1fe54f1f0536aed7d41bbdeeca076534abafe98d 100644 --- a/paddle/operators/detail/send_recv_impl.h +++ b/paddle/operators/detail/send_recv_impl.h @@ -1,23 +1,23 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once -#include "paddle/framework/data_type.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/scope.h" #include "paddle/framework/selected_rows.h" +#include "paddle/framework/var_type.h" #include "paddle/operators/detail/simple_block_queue.h" #include "paddle/operators/detail/send_recv.grpc.pb.h" @@ -44,7 +44,7 @@ namespace paddle { namespace operators { namespace detail { -typedef std::pair TensorWithName; +typedef std::pair MessageWithName; class SendRecvServerImpl final : public SendRecvService::Service { public: @@ -60,11 +60,13 @@ class SendRecvServerImpl final : public SendRecvService::Service { void Done(); void SetScope(framework::Scope *scope) { scope_ = scope; }; - const TensorWithName Get() { return this->var_recv_queue_.Pop(); } + const MessageWithName Get() { return this->var_recv_queue_.Pop(); } + + void Push(const MessageWithName &msg) { this->var_recv_queue_.Push(msg); } private: // received variable from RPC, operators fetch variable from this queue. - SimpleBlockQueue var_recv_queue_; + SimpleBlockQueue var_recv_queue_; framework::Scope *scope_; // condition of the sub program std::mutex mutex_; @@ -87,6 +89,53 @@ class RPCClient { std::unique_ptr stub_; }; +inline void SerializeToMessage(const std::string &name, + const framework::Variable *var, + const platform::DeviceContext &ctx, + VariableMessage *msg) { + msg->set_varname(name); + std::ostringstream oss; + switch (framework::ToVarType(var->Type())) { + case framework::proto::VarDesc_VarType_LOD_TENSOR: + msg->set_type(sendrecv::VarType::LOD_TENSOR); + framework::SerializeToStream(oss, var->Get(), ctx); + break; + case framework::proto::VarDesc_VarType_SELECTED_ROWS: + msg->set_type(sendrecv::VarType::SELECTED_ROWS); + framework::SerializeToStream(oss, var->Get(), + ctx); + break; + default: { + PADDLE_THROW("Serialize does not support type: %s", + typeid(var->Type()).name()); + break; + } + } + msg->set_serialized(oss.str()); +} + +inline void DeserializeFromMessage(const VariableMessage &msg, + const platform::DeviceContext &ctx, + framework::Variable *var) { + using namespace paddle::framework::proto; + std::istringstream iss(msg.serialized()); + switch (msg.type()) { + case sendrecv::VarType::LOD_TENSOR: + DeserializeFromStream(iss, var->GetMutable(), ctx); + break; + case sendrecv::VarType::SELECTED_ROWS: { + DeserializeFromStream(iss, var->GetMutable(), + ctx); + break; + } + default: { + PADDLE_THROW("Deserialize does not support type: %s", + typeid(var->Type()).name()); + break; + } + } +} + } // namespace detail } // namespace operators } // namespace paddle diff --git a/paddle/operators/detail/simple_block_queue.h b/paddle/operators/detail/simple_block_queue.h index 44899217579532af2c1d2e6074ec0e08231e7b86..c7f5ff4b5f494ce80b4ee792afb44841e9c8a2f2 100644 --- a/paddle/operators/detail/simple_block_queue.h +++ b/paddle/operators/detail/simple_block_queue.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/detail/strided_memcpy.h b/paddle/operators/detail/strided_memcpy.h index 068c82f399316a1587d7322d8dab75823656800e..9ed524d4dcf7f8bd4607281ade34e9d56f409085 100644 --- a/paddle/operators/detail/strided_memcpy.h +++ b/paddle/operators/detail/strided_memcpy.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/ddim.h" @@ -35,7 +35,7 @@ struct StridedMemcpyFunctor { memory::Copy(cpu_place, dst, cpu_place, src, sizeof(T) * dst_dim.head); } else { #ifdef PADDLE_WITH_CUDA - auto& gpu_place = boost::get(place); + auto& gpu_place = boost::get(place); auto& cuda_ctx = reinterpret_cast(dev_ctx); memory::Copy(gpu_place, dst, gpu_place, src, sizeof(T) * dst_dim.head, diff --git a/paddle/operators/detection_output_op.cc b/paddle/operators/detection_output_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..ea44cd32678d7e8a5836c1886cf9c1b4961970aa --- /dev/null +++ b/paddle/operators/detection_output_op.cc @@ -0,0 +1,89 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/detection_output_op.h" +namespace paddle { +namespace operators { + +class DetectionOutputOpMaker : public framework::OpProtoAndCheckerMaker { + public: + DetectionOutputOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Loc", + "(Tensor) The input tensor of detection_output operator." + "The input predict locations" + "The format of input tensor is kNCHW. Where K is priorbox point " + "numbers," + "N is How many boxes are there on each point, " + "C is 4, H and W both are 1."); + AddInput("Conf", + "(Tensor) The input tensor of detection_output operator." + "The input priorbox confidence." + "The format of input tensor is kNCHW. Where K is priorbox point " + "numbers," + "N is How many boxes are there on each point, " + "C is the number of classes, H and W both are 1."); + AddInput("PriorBox", + "(Tensor) The input tensor of detection_output operator." + "The format of input tensor is the position and variance " + "of the boxes"); + AddOutput("Out", + "(Tensor) The output tensor of detection_output operator."); + AddAttr("background_label_id", "(int), The background class index."); + AddAttr("num_classes", "(int), The number of the classification."); + AddAttr("nms_threshold", + "(float), The Non-maximum suppression threshold."); + AddAttr("confidence_threshold", + "(float), The classification confidence threshold."); + AddAttr("top_k", "(int), The bbox number kept of the layer’s output."); + AddAttr("nms_top_k", + "(int), The bbox number kept of the NMS’s output."); + AddComment(R"DOC( + detection output for SSD(single shot multibox detector) + Apply the NMS to the output of network and compute the predict + bounding box location. The output’s shape of this layer could + be zero if there is no valid bounding box. + )DOC"); + } +}; + +class DetectionOutputOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Loc"), + "Input(X) of DetectionOutputOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Conf"), + "Input(X) of DetectionOutputOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasInput("PriorBox"), + "Input(X) of DetectionOutputOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of DetectionOutputOp should not be null."); + std::vector output_shape({1, 7}); + ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(detection_output, ops::DetectionOutputOp, + ops::DetectionOutputOpMaker); +REGISTER_OP_CPU_KERNEL( + detection_output, + ops::DetectionOutputKernel, + ops::DetectionOutputKernel); diff --git a/paddle/operators/detection_output_op.cu.cc b/paddle/operators/detection_output_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..4a6560e0492c559afd06e5152c34fab545b7ce61 --- /dev/null +++ b/paddle/operators/detection_output_op.cu.cc @@ -0,0 +1,21 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/detection_output_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + detection_output, + ops::DetectionOutputKernel, + ops::DetectionOutputKernel); diff --git a/paddle/operators/detection_output_op.h b/paddle/operators/detection_output_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f8abd5b6406f05747b87fcfd464baeb705a7f7f2 --- /dev/null +++ b/paddle/operators/detection_output_op.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/framework/tensor.h" +#include "paddle/operators/math/detection_util.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/softmax.h" +#include "paddle/operators/strided_memcpy.h" +namespace paddle { +namespace operators { +template +inline void transpose_fun(const framework::ExecutionContext& context, + const framework::Tensor& src, + framework::Tensor* dst) { + int input_nums = src.dims()[0]; + int offset = 0; + for (int j = 0; j < input_nums; ++j) { + framework::Tensor in_p_tensor = src.Slice(j, j + 1); + std::vector shape_vec( + {in_p_tensor.dims()[0], in_p_tensor.dims()[1], in_p_tensor.dims()[3], + in_p_tensor.dims()[4], in_p_tensor.dims()[2]}); + framework::DDim shape(framework::make_ddim(shape_vec)); + framework::Tensor in_p_tensor_transpose; + in_p_tensor_transpose.mutable_data(shape, context.GetPlace()); + std::vector shape_axis({0, 1, 3, 4, 2}); + math::Transpose trans5; + trans5(context.template device_context(), in_p_tensor, + &in_p_tensor_transpose, shape_axis); + auto dst_stride = framework::stride(dst->dims()); + auto src_stride = framework::stride(in_p_tensor_transpose.dims()); + StridedMemcpy(context.device_context(), in_p_tensor_transpose.data(), + src_stride, in_p_tensor_transpose.dims(), dst_stride, + dst->data() + offset); + offset += in_p_tensor_transpose.dims()[4] * src_stride[4]; + } +} +template +class DetectionOutputKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* in_loc = context.Input("Loc"); + const framework::Tensor* in_conf = context.Input("Conf"); + const framework::Tensor* in_priorbox = + context.Input("PriorBox"); + auto* out = context.Output("Out"); + int num_classes = context.template Attr("num_classes"); + int top_k = context.template Attr("top_k"); + int nms_top_k = context.template Attr("nms_top_k"); + int background_label_id = context.template Attr("background_label_id"); + float nms_threshold = context.template Attr("nms_threshold"); + float confidence_threshold = + context.template Attr("confidence_threshold"); + size_t batch_size = in_conf->dims()[1]; + int conf_sum_size = in_conf->numel(); + // for softmax + std::vector conf_shape_softmax_vec( + {conf_sum_size / num_classes, num_classes}); + framework::DDim conf_shape_softmax( + framework::make_ddim(conf_shape_softmax_vec)); + // for knchw => nhwc + std::vector loc_shape_vec({1, in_loc->dims()[1], in_loc->dims()[3], + in_loc->dims()[4], + in_loc->dims()[2] * in_loc->dims()[0]}); + std::vector conf_shape_vec( + {1, in_conf->dims()[1], in_conf->dims()[3], in_conf->dims()[4], + in_conf->dims()[2] * in_conf->dims()[0]}); + framework::DDim loc_shape(framework::make_ddim(loc_shape_vec)); + framework::DDim conf_shape(framework::make_ddim(conf_shape_vec)); + framework::Tensor loc_tensor; + framework::Tensor conf_tensor; + loc_tensor.mutable_data(loc_shape, context.GetPlace()); + conf_tensor.mutable_data(conf_shape, context.GetPlace()); + // for cpu + framework::Tensor loc_cpu; + framework::Tensor conf_cpu; + framework::Tensor priorbox_cpu; + const T* priorbox_data = in_priorbox->data(); + transpose_fun(context, *in_loc, &loc_tensor); + transpose_fun(context, *in_conf, &conf_tensor); + conf_tensor.Resize(conf_shape_softmax); + math::SoftmaxFunctor()( + context.template device_context(), &conf_tensor, + &conf_tensor); + T* loc_data = loc_tensor.data(); + T* conf_data = conf_tensor.data(); + if (platform::is_gpu_place(context.GetPlace())) { + loc_cpu.mutable_data(loc_tensor.dims(), platform::CPUPlace()); + framework::CopyFrom(loc_tensor, platform::CPUPlace(), + context.device_context(), &loc_cpu); + loc_data = loc_cpu.data(); + conf_cpu.mutable_data(conf_tensor.dims(), platform::CPUPlace()); + framework::CopyFrom(conf_tensor, platform::CPUPlace(), + context.device_context(), &conf_cpu); + conf_data = conf_cpu.data(); + priorbox_cpu.mutable_data(in_priorbox->dims(), platform::CPUPlace()); + framework::CopyFrom(*in_priorbox, platform::CPUPlace(), + context.device_context(), &priorbox_cpu); + priorbox_data = priorbox_cpu.data(); + } + // get decode bboxes + size_t num_priors = in_priorbox->numel() / 8; + std::vector>> all_decoded_bboxes; + for (size_t n = 0; n < batch_size; ++n) { + std::vector> decoded_bboxes; + for (size_t i = 0; i < num_priors; ++i) { + size_t prior_offset = i * 8; + size_t loc_pred_offset = n * num_priors * 4 + i * 4; + std::vector> prior_bbox_vec; + math::GetBBoxFromPriorData(priorbox_data + prior_offset, 1, + prior_bbox_vec); + std::vector> prior_bbox_var; + math::GetBBoxVarFromPriorData(priorbox_data + prior_offset, 1, + prior_bbox_var); + std::vector loc_pred_data; + for (size_t j = 0; j < 4; ++j) + loc_pred_data.push_back(*(loc_data + loc_pred_offset + j)); + math::BBox bbox = math::DecodeBBoxWithVar( + prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data); + decoded_bboxes.push_back(bbox); + } + all_decoded_bboxes.push_back(decoded_bboxes); + } + std::vector>> all_indices; + int num_kept = math::GetDetectionIndices( + conf_data, num_priors, num_classes, background_label_id, batch_size, + confidence_threshold, nms_top_k, nms_threshold, top_k, + all_decoded_bboxes, &all_indices); + + if (num_kept <= 0) { + std::vector out_shape_vec({0, 0}); + framework::DDim out_shape(framework::make_ddim(out_shape_vec)); + out->Resize(out_shape); + return; + } + std::vector out_shape_vec({num_kept, 7}); + framework::DDim out_shape(framework::make_ddim(out_shape_vec)); + out->mutable_data(out_shape, context.GetPlace()); + framework::Tensor out_cpu; + T* out_data = out->data(); + if (platform::is_gpu_place(context.GetPlace())) { + out_cpu.mutable_data(out->dims(), platform::CPUPlace()); + out_data = out_cpu.data(); + } + math::GetDetectionOutput(conf_data, num_kept, num_priors, num_classes, + batch_size, all_indices, all_decoded_bboxes, + out_data); + if (platform::is_gpu_place(context.GetPlace())) { + framework::CopyFrom(out_cpu, platform::CUDAPlace(), + context.device_context(), out); + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index c4bee44e3e5a16334fb9070165eab5c7cdf0141c..35cb18797ff66cb87a6658e73ce02b0bfae29baa 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/dropout_op.h" @@ -25,8 +25,6 @@ class DropoutOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); - PADDLE_ENFORCE_GE(ctx->Attrs().Get("dropout_prob"), 0); - PADDLE_ENFORCE_LE(ctx->Attrs().Get("dropout_prob"), 1); auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); @@ -47,7 +45,11 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate(); AddAttr("dropout_prob", "Probability of setting units to zero.") - .SetDefault(.5f); + .SetDefault(.5f) + .AddCustomChecker([](const float& drop_p) { + PADDLE_ENFORCE(drop_p >= 0.0f && drop_p <= 1.0f, + "'dropout_prob' must be between 0.0 and 1.0."); + }); AddAttr("is_test", "True if in test phase.").SetDefault(false); AddAttr("seed", "Dropout random seed.").SetDefault(0); @@ -78,8 +80,6 @@ class DropoutOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), "Input(Out@GRAD) must not be null."); - PADDLE_ENFORCE_GE(ctx->Attrs().Get("dropout_prob"), 0); - PADDLE_ENFORCE_LE(ctx->Attrs().Get("dropout_prob"), 1); auto x_dims = ctx->GetInputDim("X"); auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ(x_dims, out_dims, diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index c31d2195e95b116451b0f620f6582f65c0dae706..c56930336e865079f1b96df0f35b0a051fe63a27 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include @@ -30,16 +30,15 @@ struct MaskGenerator { __host__ __device__ MaskGenerator(AttrType dropout_prob, int seed) : dropout_prob(dropout_prob), seed(seed) {} - __host__ __device__ T operator()(const unsigned int n) const { + inline __host__ __device__ T operator()(const unsigned int n) const { thrust::minstd_rand rng; rng.seed(seed); thrust::uniform_real_distribution dist(0, 1); rng.discard(n); if (dist(rng) < dropout_prob) { return static_cast(0); - } else { - return static_cast(1); } + return static_cast(1); } }; diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index 9f6c4212d4f834bbb2d1c65c836f3d3d0f3e0c96..c90b8d277eb78048c001d36a367287146b51c636 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/operators/elementwise_add_op.cc b/paddle/operators/elementwise_add_op.cc index b6bd794a74665cef546347015be25ab989e852b2..70b7c9f2ec11bf8ad56a24324a53792955edc77d 100644 --- a/paddle/operators/elementwise_add_op.cc +++ b/paddle/operators/elementwise_add_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/elementwise_add_op.h" #include "paddle/operators/elementwise_op.h" diff --git a/paddle/operators/elementwise_add_op.cu b/paddle/operators/elementwise_add_op.cu index 78642bb4246e7328dd3e2d902aca88615d598ddf..641cea323acee549898cb6f0245ccac4c069ce32 100644 --- a/paddle/operators/elementwise_add_op.cu +++ b/paddle/operators/elementwise_add_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/elementwise_add_op.h" diff --git a/paddle/operators/elementwise_add_op.h b/paddle/operators/elementwise_add_op.h index 069bdaf0ab7469b0a814ca5f68b444b9ce4904f1..59abbb57d1dcfbef6ead70e4afa9a3816d60d9b5 100644 --- a/paddle/operators/elementwise_add_op.h +++ b/paddle/operators/elementwise_add_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/elementwise_div_op.cc b/paddle/operators/elementwise_div_op.cc index 78eae53f53593e5fd3a20daad09098190b4b59f6..1fa960866fa2066a351ef2e65a3c77cf8b6595f7 100644 --- a/paddle/operators/elementwise_div_op.cc +++ b/paddle/operators/elementwise_div_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/elementwise_div_op.h" #include "paddle/operators/elementwise_op.h" diff --git a/paddle/operators/elementwise_div_op.cu b/paddle/operators/elementwise_div_op.cu index 502c52893667e246a19bb04c8bf3ed3df3265f2d..a0372123d6ffe7e3a90727ddd37a787c1030e0bd 100644 --- a/paddle/operators/elementwise_div_op.cu +++ b/paddle/operators/elementwise_div_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/elementwise_div_op.h" diff --git a/paddle/operators/elementwise_div_op.h b/paddle/operators/elementwise_div_op.h index d91313db4225d8fe051856345367a15867bdf215..875abd313ffc8fdf910d461922ff41f65ef276e7 100644 --- a/paddle/operators/elementwise_div_op.h +++ b/paddle/operators/elementwise_div_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/elementwise_mul_op.cc b/paddle/operators/elementwise_mul_op.cc index f0a61b8b081f5675b1684022e61876ed4d1d4aca..a6d11736194cb79bdc247c721acf8bda9c81dbe5 100644 --- a/paddle/operators/elementwise_mul_op.cc +++ b/paddle/operators/elementwise_mul_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/elementwise_mul_op.h" #include "paddle/operators/elementwise_op.h" diff --git a/paddle/operators/elementwise_mul_op.cu b/paddle/operators/elementwise_mul_op.cu index 089451b3e1288b3adc689a3c7d9fea2bc5243407..f73e8afda960a89aff8568eab66b0f120db2e342 100644 --- a/paddle/operators/elementwise_mul_op.cu +++ b/paddle/operators/elementwise_mul_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/elementwise_mul_op.h" diff --git a/paddle/operators/elementwise_mul_op.h b/paddle/operators/elementwise_mul_op.h index 16fa5ec4b3a369805acb401bae5407072101af8d..3ee50207c07fa2b7ccf2c002903a4f055dbfb352 100644 --- a/paddle/operators/elementwise_mul_op.h +++ b/paddle/operators/elementwise_mul_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/operators/elementwise_op_function.h" diff --git a/paddle/operators/elementwise_op_function.h b/paddle/operators/elementwise_op_function.h index 9edfacd6dfb506e12a0d82772d8de301bb8506e2..560247cb108dce5432bfe66556b9e675a3accc27 100644 --- a/paddle/operators/elementwise_op_function.h +++ b/paddle/operators/elementwise_op_function.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" diff --git a/paddle/operators/elementwise_sub_op.cc b/paddle/operators/elementwise_sub_op.cc index 1c4168621c343f14d603b18dd6c518052f83ad0d..2a8d0845b1800277a7d3cd6ff6c5c984e92197ee 100644 --- a/paddle/operators/elementwise_sub_op.cc +++ b/paddle/operators/elementwise_sub_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/elementwise_sub_op.h" #include "paddle/operators/elementwise_op.h" diff --git a/paddle/operators/elementwise_sub_op.cu b/paddle/operators/elementwise_sub_op.cu index 0b2f0f7d4d98f1336087f9fc3fc485ed8d805b5f..7a2516ef6a6d5739e9f122455d289cbfeaaf2549 100644 --- a/paddle/operators/elementwise_sub_op.cu +++ b/paddle/operators/elementwise_sub_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/elementwise_sub_op.h" diff --git a/paddle/operators/elementwise_sub_op.h b/paddle/operators/elementwise_sub_op.h index 731a30c5e30d3f9bbdbabd62e5d9a77559500b06..66edf8672d13086f883f0a2ad7ef5802317cc79a 100644 --- a/paddle/operators/elementwise_sub_op.h +++ b/paddle/operators/elementwise_sub_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/operators/elementwise_op_function.h" diff --git a/paddle/operators/expand_op.cu b/paddle/operators/expand_op.cu index 99ee584d0859f9bf688899cc9b346d221415518c..84e8fa567b80599d9687fed516eac6fbb308b24a 100644 --- a/paddle/operators/expand_op.cu +++ b/paddle/operators/expand_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU diff --git a/paddle/operators/expand_op.h b/paddle/operators/expand_op.h index 14ef8b0912860f7ec39535997c39d6d4c4970650..1d9012cd4a4c6ad596e7d434b5c4ecea1ddcde87 100644 --- a/paddle/operators/expand_op.h +++ b/paddle/operators/expand_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - You may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc index 66b8080c26192a74cc27bce9a00107de89822717..48da52c3b68879a1da8550a5448090f9f1e715d3 100644 --- a/paddle/operators/feed_op.cc +++ b/paddle/operators/feed_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/op_registry.h" @@ -25,7 +25,7 @@ class FeedOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto feed_var_name = Input("X"); auto *feed_var = scope.FindVar(feed_var_name); @@ -47,7 +47,12 @@ class FeedOp : public framework::OperatorBase { auto &feed_list = feed_var->Get(); auto &feed_item = feed_list.at(static_cast(col)); auto *out_item = out_var->GetMutable(); - framework::CopyFrom(feed_item, dev_ctx.GetPlace(), dev_ctx, out_item); + + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + framework::CopyFrom(feed_item, place, dev_ctx, out_item); out_item->set_lod(feed_item.lod()); } }; diff --git a/paddle/operators/fetch_op.cc b/paddle/operators/fetch_op.cc index 616590f2001be3bea4e50c0c1755a80eb20e9348..387d1e0a747f71d85826b52d140c2838112227f6 100644 --- a/paddle/operators/fetch_op.cc +++ b/paddle/operators/fetch_op.cc @@ -1,19 +1,20 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/op_registry.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -26,7 +27,7 @@ class FetchOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto fetch_var_name = Input("X"); auto *fetch_var = scope.FindVar(fetch_var_name); PADDLE_ENFORCE(fetch_var != nullptr, @@ -51,6 +52,9 @@ class FetchOp : public framework::OperatorBase { // FIXME(yuyang18): Should we assume the fetch operator always generate // CPU outputs? + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item); dev_ctx.Wait(); dst_item.set_lod(src_item.lod()); diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 7a7e280e78309582a627087bdbdfea358c37b9eb..852ecdfe45e7f4737a505c1f722d25457ad6ad32 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -49,7 +49,7 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( static_cast(ctx.Attr("dtype")), diff --git a/paddle/operators/fill_constant_batch_size_like_op.cu.cc b/paddle/operators/fill_constant_batch_size_like_op.cu.cc index 2e0e15f36bb2e0ffd33dc6d1d25965d0cbe33186..608f4b91623e4ddf0240c37be7a8e56117dd40f2 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cu.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/fill_constant_batch_size_like_op.h" #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index 3489079eaa3e8f04e27941de942ce9e14f8434f9..dcd43a30c86b62d79f52ac640f14b295a062146c 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/framework/data_type.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/math/math_function.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -33,7 +34,7 @@ class FillConstantOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto data_type = static_cast(Attr("dtype")); auto value = Attr("value"); @@ -45,8 +46,11 @@ class FillConstantOp : public framework::OperatorBase { auto cpu = platform::CPUPlace(); out.mutable_data(cpu, framework::ToTypeIndex(data_type)); } else { - out.mutable_data(dev_ctx.GetPlace(), framework::ToTypeIndex(data_type)); + out.mutable_data(dev_place, framework::ToTypeIndex(data_type)); } + + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); math::set_constant(dev_ctx, &out, value); } }; diff --git a/paddle/operators/fill_op.cc b/paddle/operators/fill_op.cc index f0c6cff8e34c9038c2321c0326bd2ef728d665ba..084ba1db62de0a6bf6829f8e9f4c274fb777e879 100644 --- a/paddle/operators/fill_op.cc +++ b/paddle/operators/fill_op.cc @@ -1,20 +1,21 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/data_type.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/detail/safe_ref.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -42,7 +43,7 @@ class FillOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto &out = detail::Ref(detail::Ref(scope.FindVar(Output("Out")), "Cannot find variable %s", Output("Out")) @@ -51,12 +52,11 @@ class FillOp : public framework::OperatorBase { auto dtype = static_cast(Attr("dtype")); platform::CPUPlace cpu; auto force_cpu = Attr("force_cpu"); - out.mutable_data(force_cpu ? cpu : dev_ctx.GetPlace(), - framework::ToTypeIndex(dtype)); + out.mutable_data(force_cpu ? cpu : place, framework::ToTypeIndex(dtype)); framework::LoDTensor tensor; - if (force_cpu || platform::is_cpu_place(dev_ctx.GetPlace())) { + if (force_cpu || platform::is_cpu_place(place)) { tensor.ShareDataWith(out); } else { // Always make tensor in CPU memory. @@ -67,9 +67,12 @@ class FillOp : public framework::OperatorBase { framework::VisitDataType( dtype, FillOpVisitor(&tensor, Attr>("value"))); - if (!force_cpu && platform::is_gpu_place(dev_ctx.GetPlace())) { + if (!force_cpu && platform::is_gpu_place(place)) { // Copy tensor to out - framework::CopyFrom(tensor, dev_ctx.GetPlace(), dev_ctx, &out); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + framework::CopyFrom(tensor, place, dev_ctx, &out); } } }; diff --git a/paddle/operators/fill_zeros_like_op.cu.cc b/paddle/operators/fill_zeros_like_op.cu.cc index 9f412306bb5f08497990f0e0385f695d838c2400..b7048e8f5857e646e16d5017593f5d3c6e79ea7e 100644 --- a/paddle/operators/fill_zeros_like_op.cu.cc +++ b/paddle/operators/fill_zeros_like_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/fill_zeros_like_op.h" #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/gather.cu.h b/paddle/operators/gather.cu.h index c806aa5f05ad214abb3484935d82b67880a1db7a..9840c066f053e5e1cd1c756d4dd938eace1a5eb4 100644 --- a/paddle/operators/gather.cu.h +++ b/paddle/operators/gather.cu.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/tensor.h" diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 47af222314c40a2c77ee422ccc70602078b3f1fb..45e9d8df702403e66f9100e4edaf5c17470eb20d 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -40,7 +40,7 @@ class GatherOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -57,7 +57,7 @@ class GatherGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/gather_op.cu b/paddle/operators/gather_op.cu index b37f0576e276b2aa995f01de635ec153a0db36aa..eec2415e1de2434de0a920567863d421d2d3032d 100644 --- a/paddle/operators/gather_op.cu +++ b/paddle/operators/gather_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "gather.cu.h" #include "paddle/framework/eigen.h" diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 5eab1d5f4ee067db602ab81a9df1854bcfaf78a8..9ed493a7d027e1bd3e8c5fca376678fd5fcf14f1 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include "paddle/framework/op_registry.h" @@ -57,7 +60,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( static_cast(ctx.Attr("dtype")), diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index ffce6f713816abe7d1f207f141a1b0933574e2ff..8a70db17e17ebf7d5bad1e1ee6a2acdff1b85a09 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ -#include -#include +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/gru_op.cc b/paddle/operators/gru_op.cc index 8e7000654c62b50a3ca130e2ffed4a0f5880de91..76f2adefede3b4bc4035f86f8f8663eed29343ae 100644 --- a/paddle/operators/gru_op.cc +++ b/paddle/operators/gru_op.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/gru_op.h" diff --git a/paddle/operators/gru_op.cu.cc b/paddle/operators/gru_op.cu.cc index 458630ca6187ec89638046d8eea63c31eca518f2..9cb0cc42d5589792aae6d99cec807aac6e4991b6 100644 --- a/paddle/operators/gru_op.cu.cc +++ b/paddle/operators/gru_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/gru_op.h" diff --git a/paddle/operators/gru_op.h b/paddle/operators/gru_op.h index 6d02dff578846904beeb58c5161d27c7c2ed5d70..b1957fb9ce6add8628cb206abf2c569d3f615c85 100644 --- a/paddle/operators/gru_op.h +++ b/paddle/operators/gru_op.h @@ -1,19 +1,20 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once +#include "paddle/operators/math/detail/activation_functions.h" #include "paddle/operators/math/gru_compute.h" #include "paddle/operators/math/math_function.h" #include "paddle/operators/math/sequence2batch.h" @@ -70,7 +71,7 @@ class GRUKernel : public framework::OpKernel { } int frame_size = hidden_dims[1]; - math::hl_gru_value gru_value; + math::GRUMetaValue gru_value; gru_value.gate_weight = const_cast(weight_data); gru_value.state_weight = const_cast(weight_data + 2 * frame_size * frame_size); @@ -89,6 +90,10 @@ class GRUKernel : public framework::OpKernel { } auto batch_starts = batch_gate->lod()[0]; size_t num_batch = batch_starts.size() - 1; + auto active_node = math::detail::GetActivationType( + context.Attr("activation")); + auto active_gate = math::detail::GetActivationType( + context.Attr("gate_activation")); for (size_t n = 0; n < num_batch; n++) { int bstart = static_cast(batch_starts[n]); int bend = static_cast(batch_starts[n + 1]); @@ -101,9 +106,8 @@ class GRUKernel : public framework::OpKernel { gru_value.gate_value = gate_t.data(); gru_value.reset_output_value = reset_hidden_prev_t.data(); math::GRUUnitFunctor::compute( - dev_ctx, gru_value, frame_size, cur_batch_size, - math::ActiveType(context.Attr("activation")), - math::ActiveType(context.Attr("gate_activation"))); + dev_ctx, gru_value, frame_size, cur_batch_size, active_node, + active_gate); gru_value.prev_out_value = gru_value.output_value; } @@ -170,12 +174,12 @@ class GRUGradKernel : public framework::OpKernel { batch_hidden_grad.set_lod(batch_hidden->lod()); to_batch(dev_ctx, *hidden_grad, batch_hidden_grad, false, is_reverse); - math::hl_gru_value gru_value; + math::GRUMetaValue gru_value; gru_value.gate_weight = const_cast(weight_data); gru_value.state_weight = const_cast(weight_data + 2 * frame_size * frame_size); - math::hl_gru_grad gru_grad; + math::GRUMetaGrad gru_grad; if (weight_grad) { gru_grad.gate_weight_grad = weight_grad->mutable_data(context.GetPlace()); @@ -189,6 +193,10 @@ class GRUGradKernel : public framework::OpKernel { auto batch_starts = batch_hidden_grad.lod()[0]; size_t num_batch = batch_starts.size() - 1; + auto active_node = math::detail::GetActivationType( + context.Attr("activation")); + auto active_gate = math::detail::GetActivationType( + context.Attr("gate_activation")); for (int n = static_cast(num_batch) - 1; n >= 0; n--) { int bstart = static_cast(batch_starts[n]); int bend = static_cast(batch_starts[n + 1]); @@ -219,9 +227,8 @@ class GRUGradKernel : public framework::OpKernel { } math::GRUUnitGradFunctor::compute( - dev_ctx, gru_value, gru_grad, frame_size, cur_batch_size, - math::ActiveType(context.Attr("activation")), - math::ActiveType(context.Attr("gate_activation"))); + dev_ctx, gru_value, gru_grad, frame_size, cur_batch_size, active_node, + active_gate); } if (input_grad) { input_grad->mutable_data(context.GetPlace()); diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc index 7e5f674a8c020d931fd375ff5994da18052aa8fa..c354293be7720abd7d96b1c4311b32049a16730c 100644 --- a/paddle/operators/gru_unit_op.cc +++ b/paddle/operators/gru_unit_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/gru_unit_op.h" diff --git a/paddle/operators/gru_unit_op.cu b/paddle/operators/gru_unit_op.cu index 7c752db494b59c3ec2af093332777ce6655fb477..95c8c23dadadf0e053012c86d44346ee31565cfc 100644 --- a/paddle/operators/gru_unit_op.cu +++ b/paddle/operators/gru_unit_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/gru_unit_op.h" diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h index 8fe60c750da0a42089dc38190d2dda3d08e5ba06..a77be46718b766d9a0a8b8fb4cf2316b44687db8 100644 --- a/paddle/operators/gru_unit_op.h +++ b/paddle/operators/gru_unit_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/hinge_loss_op.cu b/paddle/operators/hinge_loss_op.cu index 31a5bde292ebcab899ad05a813c685963dd5bc25..b9cfbc50c49c6cc902cb3667200c12c74fb5d13d 100644 --- a/paddle/operators/hinge_loss_op.cu +++ b/paddle/operators/hinge_loss_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/hinge_loss_op.h" diff --git a/paddle/operators/huber_loss_op.cu b/paddle/operators/huber_loss_op.cu index d49a4d9d4236c402f2559c5a0a5de097c2edc61f..ccc83a16ba271f0aa879c5c075dce1932dd40494 100644 --- a/paddle/operators/huber_loss_op.cu +++ b/paddle/operators/huber_loss_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/huber_loss_op.h" diff --git a/paddle/operators/increment_op.cc b/paddle/operators/increment_op.cc index 789c92102d63355a80c3330f2107c731206397f4..e0b80cc4e74429dee1b9a25e41b116970ad4de2a 100644 --- a/paddle/operators/increment_op.cc +++ b/paddle/operators/increment_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" @@ -52,7 +52,7 @@ class IncrementOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto &x = scope.FindVar(Input("X"))->Get(); auto &out = *scope.FindVar(Output("Out"))->GetMutable(); diff --git a/paddle/operators/is_empty_op.cc b/paddle/operators/is_empty_op.cc index 3616a0414f9e889376f8ba46e7567d7171eff3bf..492ae48845aa5aa123989e62d07f5ae899af6193 100644 --- a/paddle/operators/is_empty_op.cc +++ b/paddle/operators/is_empty_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" @@ -29,7 +29,7 @@ class IsEmptyOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { // get input auto *var = scope.FindVar(Input(kInput)); PADDLE_ENFORCE_NOT_NULL(var); diff --git a/paddle/operators/l1_norm_op.cc b/paddle/operators/l1_norm_op.cc index 3d1da79763102c876de3b45e56438da909b00394..1a5d6e19263325821dd220d8a31c0e34600b8220 100644 --- a/paddle/operators/l1_norm_op.cc +++ b/paddle/operators/l1_norm_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/l1_norm_op.h" diff --git a/paddle/operators/l1_norm_op.cu b/paddle/operators/l1_norm_op.cu index fd725f86f6c98c5aff844546361d8599ea3527ab..7ecc774670a8480da46ac688f3635e04b1ab7c1f 100644 --- a/paddle/operators/l1_norm_op.cu +++ b/paddle/operators/l1_norm_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/l1_norm_op.h" diff --git a/paddle/operators/l1_norm_op.h b/paddle/operators/l1_norm_op.h index ae3878f2b7b079027a9e9145cefa9eae6b22ffbc..086d42705dceaf0cbd35ce8e5115156a76a0b6e8 100644 --- a/paddle/operators/l1_norm_op.h +++ b/paddle/operators/l1_norm_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index ad15e8ebd2b323929a4448e98a18c5cad6f5ed12..666207ea07628ca5f2a8313fa3f5febda140a294 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -183,7 +183,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of linear_chain_crf // is determined by its input "Emission". - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Emission")->type()), @@ -242,7 +242,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of output of the linear_chain_crf_grad // operator is determined by its input: gradients of LogLikelihood. - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType( diff --git a/paddle/operators/linear_chain_crf_op.cu b/paddle/operators/linear_chain_crf_op.cu index 3b105ec3414b5d63946331319d0f47a38e7908cc..da612510b4d45d8eefabe7de303e9fd0132c5f77 100644 --- a/paddle/operators/linear_chain_crf_op.cu +++ b/paddle/operators/linear_chain_crf_op.cu @@ -1,10 +1,10 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h index 694584e79c3a1e818814a4a2145f52d8db7cf10a..19c6715ec877dea6dcf0babc7373333a4d9eed0f 100644 --- a/paddle/operators/linear_chain_crf_op.h +++ b/paddle/operators/linear_chain_crf_op.h @@ -219,8 +219,8 @@ class LinearChainCRFOpKernel : public framework::OpKernel { // operators runs on GPU device. auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, Tensor* dst) { - dst->mutable_data(platform::GPUPlace()); - framework::CopyFrom(src, platform::GPUPlace(), ctx, dst); + dst->mutable_data(platform::CUDAPlace()); + framework::CopyFrom(src, platform::CUDAPlace(), ctx, dst); }; copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst); @@ -433,8 +433,8 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor* src, Tensor* dst) { if (src && dst) { - dst->mutable_data(platform::GPUPlace()); - framework::CopyFrom(*src, platform::GPUPlace(), ctx, dst); + dst->mutable_data(platform::CUDAPlace()); + framework::CopyFrom(*src, platform::CUDAPlace(), ctx, dst); } }; copyTensor(ctx, emission_grad_src, emission_grad_dst); diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index 6c51dad27a4d9cd9e48b8591b1f14472c83ceaf1..7f551f101f3b3c097f664cc3e9240c2cee7f6830 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -1,20 +1,20 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include #include "paddle/framework/op_registry.h" - -#include +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -26,7 +26,7 @@ class LoadOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto filename = Attr("file_path"); std::ifstream fin(filename); PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s for load op", @@ -38,9 +38,11 @@ class LoadOp : public framework::OperatorBase { out_var_name); auto *tensor = out_var->GetMutable(); - framework::DeserializeFromStream(fin, tensor); - auto place = dev_ctx.GetPlace(); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + DeserializeFromStream(fin, tensor, dev_ctx); + if (platform::is_gpu_place(place)) { // copy CPU to GPU framework::LoDTensor cpu_tensor; diff --git a/paddle/operators/lod_array_length_op.cc b/paddle/operators/lod_array_length_op.cc index cc8593810baf83e12368e67ceaeef0631e35c051..d2c52745cfdf8d0fdb168ef2d90e75a515c31015 100644 --- a/paddle/operators/lod_array_length_op.cc +++ b/paddle/operators/lod_array_length_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" @@ -26,7 +26,7 @@ class LoDArrayLengthOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto &x = scope.FindVar(Input("X"))->Get(); auto &out = *scope.FindVar(Output("Out"))->GetMutable(); diff --git a/paddle/operators/lod_rank_table_op.cc b/paddle/operators/lod_rank_table_op.cc index 2d67046bfee01d8d148da1c8b705d3ad959a4839..8711dd62c886fdada083d316d6aabc93a050ff82 100644 --- a/paddle/operators/lod_rank_table_op.cc +++ b/paddle/operators/lod_rank_table_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/lod_rank_table.h" #include "paddle/framework/op_registry.h" namespace paddle { @@ -24,7 +24,7 @@ class LoDRankTableOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto x = scope.FindVar(Input("X"))->Get(); auto *out = scope.FindVar(Output("Out"))->GetMutable(); diff --git a/paddle/operators/lod_reset_op.cc b/paddle/operators/lod_reset_op.cc index ccb87258c6b8629cd18d08185bfcc84c247070dd..f3c0badf2a74431b980abd532e51ba3d251524a1 100644 --- a/paddle/operators/lod_reset_op.cc +++ b/paddle/operators/lod_reset_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/lod_reset_op.h" @@ -38,7 +38,7 @@ class LoDResetOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -97,7 +97,7 @@ class LoDResetGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/lod_reset_op.cu b/paddle/operators/lod_reset_op.cu index f7c235898096ffb3d6ba039cb3f01d5bc9ef5364..910866ea6330059f8e0b04e036e3b124e920b5c4 100644 --- a/paddle/operators/lod_reset_op.cu +++ b/paddle/operators/lod_reset_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/lod_reset_op.h" diff --git a/paddle/operators/lod_reset_op.h b/paddle/operators/lod_reset_op.h index b86f8b13135fa809ade3b001434eda5d88375c2c..306373fb1fb6f16a0db7f0e836e38fd8c49f7e86 100644 --- a/paddle/operators/lod_reset_op.h +++ b/paddle/operators/lod_reset_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc index 643f8859f3d0d44c0b5be922bd786ab04093df94..8d164b4abc54722a95a176dfe8ed341f8c5125d1 100644 --- a/paddle/operators/lod_tensor_to_array_op.cc +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -1,20 +1,21 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/detail/safe_ref.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -32,7 +33,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto &x = detail::Ref(scope.FindVar(Input("X")), "Cannot find input %s", Input("X")) .Get(); @@ -86,6 +87,11 @@ class LoDTensorToArrayOp : public framework::OperatorBase { // out[i][offset: offset+len] = x[each_range.begin: each_range.end] auto slice = out[i].Slice(static_cast(offset), static_cast(offset + len)); + + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + framework::CopyFrom(x.Slice(static_cast(each_range.begin), static_cast(each_range.end)), x.place(), dev_ctx, &slice); diff --git a/paddle/operators/log_loss_op.cu b/paddle/operators/log_loss_op.cu index e87ac7d12a2b730085b4e9a33457612c4eba2655..be283e470052cc3a569be564ab4baa6bc5b75808 100644 --- a/paddle/operators/log_loss_op.cu +++ b/paddle/operators/log_loss_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/log_loss_op.h" diff --git a/paddle/operators/logical_op.cc b/paddle/operators/logical_op.cc index 2bd6c6efae38d6d8d49cc9f3fd97cf316fbbdd0a..7417192479a13ca9537e2d40f9779a3bf5f1eb61 100644 --- a/paddle/operators/logical_op.cc +++ b/paddle/operators/logical_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/logical_op.h" #include "paddle/framework/op_registry.h" @@ -99,9 +99,9 @@ class LogicalOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { - framework::OpKernelType kt = OperatorWithKernel::GetKernelType(ctx); + framework::OpKernelType kt = OperatorWithKernel::GetActualKernelType(ctx); // LogicalOp kernel's device type is decided by input tensor place kt.place_ = ctx.Input("X")->place(); return kt; diff --git a/paddle/operators/logical_op.cu b/paddle/operators/logical_op.cu index 7fef60e0c9e957f28118e54d23c6043752d2f52f..87f2287b8f11aabe8afe87776eff49295c1ea2ac 100644 --- a/paddle/operators/logical_op.cu +++ b/paddle/operators/logical_op.cu @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/logical_op.h" diff --git a/paddle/operators/logical_op.h b/paddle/operators/logical_op.h index 629388cac81e60c8b84197238018384ffc59a08f..413857685603c7b84e885135d9aadf7cc71a4f72 100644 --- a/paddle/operators/logical_op.h +++ b/paddle/operators/logical_op.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 0a9defa8c50453abf3eefdcb89126b1349d6d756..6e5cbd6f8cefc965d6c8d24b16eb3bafde55cc49 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/lookup_table_op.h" #include "paddle/framework/var_type_inference.h" @@ -41,7 +41,7 @@ class LookupTableOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("W")->type()), @@ -98,7 +98,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("W")->type()), diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index 9431030a53975acafe9bcb22dc9164492929b07a..261a28da694bf551d8d9e630139680aebc4be51a 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -1,13 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" @@ -101,7 +104,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { // copy GPU memory to CPU pinned memory framework::Vector new_rows; new_rows.resize(ids_dim[0]); - auto gpu_place = boost::get(context.GetPlace()); + auto gpu_place = boost::get(context.GetPlace()); memory::Copy(platform::CPUPlace(), new_rows.data(), gpu_place, ids_data, ids_dim[0] * sizeof(int64_t), stream); diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h index 99b912163b71594340d8917645dff107fd208aea..2fd3335868406455ec01f9ded6bacc7bda5e2a67 100644 --- a/paddle/operators/lookup_table_op.h +++ b/paddle/operators/lookup_table_op.h @@ -1,13 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/lrn_op.cc b/paddle/operators/lrn_op.cc index 3b77b27b72d7079c10695da43a4fcfed9b4c855c..95673ba19e776b3c52eb492d0b14d761b584f807 100644 --- a/paddle/operators/lrn_op.cc +++ b/paddle/operators/lrn_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/lrn_op.h" diff --git a/paddle/operators/lrn_op.cu b/paddle/operators/lrn_op.cu index c6857c2b6d0a9011ef83d115e6edd81bf2f8a0ca..eb9d66a73dfe3e22f1151d73ce5e34f2eda0835e 100644 --- a/paddle/operators/lrn_op.cu +++ b/paddle/operators/lrn_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/lrn_op.h" diff --git a/paddle/operators/lrn_op.h b/paddle/operators/lrn_op.h index 44063d3e036809eb236bbe7c46aa0cce06b46df0..ef3a2883a88ff321fb9a87ddaf31123a3b9ee90a 100644 --- a/paddle/operators/lrn_op.h +++ b/paddle/operators/lrn_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - You may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/lstm_op.cc b/paddle/operators/lstm_op.cc index f82156170e672b5e590ddb8e0e6e8a2a24ea6868..b8fcec0f29b46e838f91ad1ee0fded8e42f27bd5 100644 --- a/paddle/operators/lstm_op.cc +++ b/paddle/operators/lstm_op.cc @@ -92,7 +92,7 @@ class LSTMOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), @@ -260,7 +260,7 @@ class LSTMGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), diff --git a/paddle/operators/lstm_op.cu.cc b/paddle/operators/lstm_op.cu.cc index 48519bed6f7d927b40d02683a7e9f2acfb8b85e5..cfcc1fc92a074c9bfe83e6c32560177edef12ae9 100644 --- a/paddle/operators/lstm_op.cu.cc +++ b/paddle/operators/lstm_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/lstm_op.h" diff --git a/paddle/operators/lstm_op.h b/paddle/operators/lstm_op.h index 14abd4bf0a6e73a9c0f000f53a5e1e380f01d1c0..c57ee414dc5b3417549c8ac3a7fd57a9c8f452df 100644 --- a/paddle/operators/lstm_op.h +++ b/paddle/operators/lstm_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/detail/activation_functions.h" #include "paddle/operators/math/lstm_compute.h" #include "paddle/operators/math/math_function.h" #include "paddle/operators/math/sequence2batch.h" @@ -102,9 +103,12 @@ class LSTMKernel : public framework::OpKernel { auto batch_starts = batch_gate->lod()[0]; size_t num_batch = batch_starts.size() - 1; - auto gate_act = ctx.Attr("gate_activation"); - auto cell_act = ctx.Attr("cell_activation"); - auto cand_act = ctx.Attr("candidate_activation"); + auto gate_act = math::detail::GetActivationType( + ctx.Attr("gate_activation")); + auto cell_act = math::detail::GetActivationType( + ctx.Attr("cell_activation")); + auto cand_act = math::detail::GetActivationType( + ctx.Attr("candidate_activation")); for (size_t n = 0; n < num_batch; n++) { int bstart = static_cast(batch_starts[n]); @@ -264,9 +268,12 @@ class LSTMGradKernel : public framework::OpKernel { batch_gate_g.mutable_data(batch_gate->dims(), ctx.GetPlace()); batch_gate_g.set_lod(batch_gate->lod()); - auto gate_act = ctx.Attr("gate_activation"); - auto cell_act = ctx.Attr("cell_activation"); - auto cand_act = ctx.Attr("candidate_activation"); + auto gate_act = math::detail::GetActivationType( + ctx.Attr("gate_activation")); + auto cell_act = math::detail::GetActivationType( + ctx.Attr("cell_activation")); + auto cand_act = math::detail::GetActivationType( + ctx.Attr("candidate_activation")); auto batch_starts = batch_gate->lod()[0]; size_t num_batch = batch_starts.size() - 1; diff --git a/paddle/operators/lstm_unit_op.cc b/paddle/operators/lstm_unit_op.cc index 34da75c00d336d3f540a9472ee2e6c4b224add09..c2d2c43982580c9724849d68576d42ffa44fc6b4 100644 --- a/paddle/operators/lstm_unit_op.cc +++ b/paddle/operators/lstm_unit_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/lstm_unit_op.h" diff --git a/paddle/operators/lstm_unit_op.cu b/paddle/operators/lstm_unit_op.cu index 291f2c295e78288c01c6575df936ceedceba7ce8..5ee5ddd280f7720c4583053e4e48a5043ab423f4 100644 --- a/paddle/operators/lstm_unit_op.cu +++ b/paddle/operators/lstm_unit_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ /* Acknowledgement: the following code is strongly inspired by https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op_gpu.cu @@ -98,7 +98,7 @@ class LstmUnitOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* x_tensor = ctx.Input("X"); auto* c_prev_tensor = ctx.Input("C_prev"); @@ -129,7 +129,7 @@ class LstmUnitGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto x_tensor = ctx.Input("X"); auto c_prev_tensor = ctx.Input("C_prev"); diff --git a/paddle/operators/lstm_unit_op.h b/paddle/operators/lstm_unit_op.h index 61705675d930369ea8d491229caa1b4046f3e16a..fa8d141bcb6ee4bfc9a29e337b7adbc5ecd3ad23 100644 --- a/paddle/operators/lstm_unit_op.h +++ b/paddle/operators/lstm_unit_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ /* Acknowledgement: the following code is strongly inspired by https://github.com/caffe2/caffe2/blob/master/caffe2/operators/lstm_unit_op.h diff --git a/paddle/operators/margin_rank_loss_op.cc b/paddle/operators/margin_rank_loss_op.cc index fddc72aec0aa7fa17ef585388c53da640d3c1837..e0df3077742bc330ce8510bf06b0411148f669d8 100644 --- a/paddle/operators/margin_rank_loss_op.cc +++ b/paddle/operators/margin_rank_loss_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/margin_rank_loss_op.h" diff --git a/paddle/operators/margin_rank_loss_op.cu b/paddle/operators/margin_rank_loss_op.cu index 1c2afccc5b32e22c939a275d8c69ad774d3ebdad..798c3ed182b08b07a779da88924bfc05743c680e 100644 --- a/paddle/operators/margin_rank_loss_op.cu +++ b/paddle/operators/margin_rank_loss_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/margin_rank_loss_op.h" diff --git a/paddle/operators/margin_rank_loss_op.h b/paddle/operators/margin_rank_loss_op.h index 9c1f96cac13f1bdb8c5dfd3e771157d1d1c60e15..7438e881e1c69c9ef1f84b21e6ee0ba093f3378a 100644 --- a/paddle/operators/margin_rank_loss_op.h +++ b/paddle/operators/margin_rank_loss_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index bf47879f772a3013bd7ce78c6f8a6aefe65298f9..7ebcfb9ab9f30e3b0f13d3646a59d008335b232d 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -9,13 +9,14 @@ if(WITH_GPU) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context) nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function) - nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context) + nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor) nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function) - nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) + nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context tensor) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context) nv_library(unpooling SRCS unpooling.cc unpooling.cu DEPS device_context) nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) + nv_library(cos_sim_functor SRCS cos_sim_functor.cc cos_sim_functor.cu DEPS device_context) else() cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) @@ -23,13 +24,14 @@ else() cc_library(cross_entropy SRCS cross_entropy.cc DEPS device_context) cc_library(pooling SRCS pooling.cc DEPS device_context) cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function) - cc_library(vol2col SRCS vol2col.cc DEPS device_context) + cc_library(vol2col SRCS vol2col.cc DEPS device_context tensor) cc_library(context_project SRCS context_project.cc DEPS device_context math_function) - cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) + cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context tensor) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) cc_library(maxouting SRCS maxouting.cc DEPS device_context) cc_library(unpooling SRCS unpooling.cc DEPS device_context) cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function) + cc_library(cos_sim_functor SRCS cos_sim_functor.cc DEPS device_context) endif() cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/cos_sim_functor.cc b/paddle/operators/math/cos_sim_functor.cc new file mode 100644 index 0000000000000000000000000000000000000000..6af9f0fcd967b4e8e9e338c155d5a8ee2866fbfa --- /dev/null +++ b/paddle/operators/math/cos_sim_functor.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/cos_sim_functor.h" + +namespace paddle { +namespace operators { +namespace math { + +template +struct CosSimDyFunctor { + void operator()(const platform::CPUDeviceContext& ctx, const T* x_norm, + const T* y_norm, const T* x, const T* y, const T* z, + const T* dz, const size_t rows, const size_t cols, + T* dy) const { + for (size_t row_id = 0; row_id < rows; ++row_id) { + auto xy_norm_prod = x_norm[row_id] * y_norm[0]; + auto dz_data = dz[row_id]; + auto z_data = z[row_id]; + auto* x_data = x + cols * row_id; + auto reciprocal_xy_norm_prod = 1 / xy_norm_prod; + + auto y_norm_square = y_norm[0] * y_norm[0]; + auto reciprocal_y_norm_square = 1 / y_norm_square; + for (size_t i = 0; i < cols; ++i) { + dy[i] += dz_data * (x_data[i] * reciprocal_xy_norm_prod - + z_data * y[i] * reciprocal_y_norm_square); + } + } + } +}; + +template struct CosSimDyFunctor; +template struct CosSimDyFunctor; +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/cos_sim_functor.cu b/paddle/operators/math/cos_sim_functor.cu new file mode 100644 index 0000000000000000000000000000000000000000..6eb0a4ea4c5b86f84c93b97615255adf55e9e042 --- /dev/null +++ b/paddle/operators/math/cos_sim_functor.cu @@ -0,0 +1,64 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/cos_sim_functor.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { +namespace math { + +template +__global__ void CosSimDyKernel(const T* x_norm, const T* y_norm, const T* x, + const T* y, const T* z, const T* dz, + const size_t rows, const size_t cols, T* dy) { + int grid_size = blockDim.x * gridDim.x; + T y_norm_data = y_norm[0]; + for (int row_id = blockIdx.x * blockDim.x + threadIdx.x; row_id < rows; + row_id += grid_size) { + T xy_norm_prod = x_norm[row_id] * y_norm_data; + T dz_data = dz[row_id]; + T z_data = z[row_id]; + const T* x_data = x + cols * row_id; + T reciprocal_xy_norm_prod = 1 / xy_norm_prod; + + T y_norm_square = y_norm_data * y_norm_data; + T reciprocal_y_norm_square = 1 / y_norm_square; + for (size_t i = 0; i < cols; ++i) { + T dy_data = dz_data * (x_data[i] * reciprocal_xy_norm_prod - + z_data * y[i] * reciprocal_y_norm_square); + platform::CudaAtomicAdd(dy + i, dy_data); + } + } +} + +template +struct CosSimDyFunctor { + void operator()(const platform::CUDADeviceContext& ctx, const T* x_norm, + const T* y_norm, const T* x, const T* y, const T* z, + const T* dz, const size_t rows, const size_t cols, + T* dy) const { + const int block_size = 512; + dim3 threads(block_size, 1); + dim3 grid(1, (rows + block_size - 1) / block_size); + CosSimDyKernel<<>>( + x_norm, y_norm, x, y, z, dz, rows, cols, dy); + } +}; + +template struct CosSimDyFunctor; +template struct CosSimDyFunctor; +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/cos_sim_functor.h b/paddle/operators/math/cos_sim_functor.h new file mode 100644 index 0000000000000000000000000000000000000000..aae8ab5b7a937c016e8a45e34b22aba7a1df3066 --- /dev/null +++ b/paddle/operators/math/cos_sim_functor.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/platform/device_context.h" +#include "paddle/platform/hostdevice.h" + +namespace paddle { +namespace operators { +namespace math { + +template +struct CosSimFunctor { + CosSimFunctor(const T* x, const T* y, T* x_norm, T* y_norm, T* z, int cols) + : x_norm_(x_norm), + y_norm_(y_norm), + x_(x), + y_(y), + z_(z), + cols_(static_cast(cols)) {} + + inline HOSTDEVICE void operator()(size_t row_id) const { + auto* x = x_ + cols_ * row_id; + T xx = 0, xy = 0, yy = 0; + if (same_row) { + auto* y = y_ + cols_ * row_id; + T tep_x, tep_y; + for (size_t i = 0; i < cols_; ++i) { + tep_x = x[i]; + tep_y = y[i]; + xx += tep_x * tep_x; + yy += tep_y * tep_y; + xy += tep_x * tep_y; + } + xx = sqrt(xx); + yy = sqrt(yy); + y_norm_[row_id] = yy; + x_norm_[row_id] = xx; + z_[row_id] = xy / (xx * yy); + } else { // This can be wrote in a better way. + T tep_x, tep_y; + for (size_t i = 0; i < cols_; ++i) { + tep_x = x[i]; + tep_y = y_[i]; + xx += tep_x * tep_x; + yy += tep_y * tep_y; + xy += tep_x * tep_y; + } + xx = sqrt(xx); + yy = sqrt(yy); + if (row_id == 0) y_norm_[0] = yy; + x_norm_[row_id] = xx; + z_[row_id] = xy / (xx * yy); + } + } + + T* x_norm_; + T* y_norm_; + const T* x_; + const T* y_; + T* z_; + const size_t cols_; +}; + +template +struct CosSimGradFunctor { + CosSimGradFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y, + const T* z, const T* dz, T* dx, int cols) + : x_norm_(x_norm), + y_norm_(y_norm), + x_(x), + y_(y), + z_(z), + dz_(dz), + dx_(dx), + cols_(static_cast(cols)) {} + + inline HOSTDEVICE void operator()(size_t row_id) const { + auto x_norm_square = x_norm_[row_id] * x_norm_[row_id]; + auto xy_norm_prod = x_norm_[row_id] * y_norm_[row_id]; + auto dz = dz_[row_id]; + auto z = z_[row_id]; + + auto* dx = dx_ + cols_ * row_id; + auto* x = x_ + cols_ * row_id; + auto* y = y_ + cols_ * row_id; + + auto reciprocal_xy_norm_prod = 1 / xy_norm_prod; + auto reciprocal_x_norm_square = 1 / x_norm_square; + for (size_t i = 0; i < cols_; ++i) { + dx[i] = dz * (y[i] * reciprocal_xy_norm_prod - + z * x[i] * reciprocal_x_norm_square); + } + } + + const T* x_norm_; + const T* y_norm_; + const T* x_; + const T* y_; + const T* z_; + const T* dz_; + T* dx_; + const size_t cols_; +}; + +template +struct CosSimDxFunctor { + CosSimDxFunctor(const T* x_norm, const T* y_norm, const T* x, const T* y, + const T* z, const T* dz, T* dx, int cols) + : x_norm_(x_norm), + y_norm_(y_norm), + x_(x), + y_(y), + z_(z), + dz_(dz), + dx_(dx), + cols_(static_cast(cols)) {} + + inline HOSTDEVICE void operator()(size_t row_id) const { + auto xy_norm_prod = x_norm_[row_id] * y_norm_[0]; + auto dz = dz_[row_id]; + auto z = z_[row_id]; + auto* x = x_ + cols_ * row_id; + auto reciprocal_xy_norm_prod = 1 / xy_norm_prod; + auto x_norm_square = x_norm_[row_id] * x_norm_[row_id]; + auto* dx = dx_ + cols_ * row_id; + auto reciprocal_x_norm_square = 1 / x_norm_square; + + for (size_t i = 0; i < cols_; ++i) { + dx[i] = dz * (y_[i] * reciprocal_xy_norm_prod - + z * x[i] * reciprocal_x_norm_square); + } + } + const T* x_norm_; + const T* y_norm_; + const T* x_; + const T* y_; + const T* z_; + const T* dz_; + T* dx_; + const size_t cols_; +}; + +template +struct CosSimDyFunctor { + void operator()(const DeviceContext& ctx, const T* x_norm, const T* y_norm, + const T* x, const T* y, const T* z, const T* dz, + const size_t rows, const size_t cols, T* dy) const; +}; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/cross_entropy.cc b/paddle/operators/math/cross_entropy.cc index 6011a196d446854877e162019f6745deb501ee9d..d9cb016fb440b6b2fe1d222812215feb5970dc4f 100644 --- a/paddle/operators/math/cross_entropy.cc +++ b/paddle/operators/math/cross_entropy.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/math/cross_entropy.h" diff --git a/paddle/operators/math/cross_entropy.cu b/paddle/operators/math/cross_entropy.cu index 2132d49c937a85afeed0e0cee0a74a7e30c6a3ca..16c9e7b28ec8d453492455c8d620ba9edf130a07 100644 --- a/paddle/operators/math/cross_entropy.cu +++ b/paddle/operators/math/cross_entropy.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/math/cross_entropy.h" diff --git a/paddle/operators/math/cross_entropy.h b/paddle/operators/math/cross_entropy.h index 677adb5adaf4041fe7acfd29be354073535fd5fc..b3b6d767a8b8f59e3c75e72ac6c98653a8e1c3a4 100644 --- a/paddle/operators/math/cross_entropy.h +++ b/paddle/operators/math/cross_entropy.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" diff --git a/paddle/operators/math/detail/activation_functions.h b/paddle/operators/math/detail/activation_functions.h index a20c35d1d9dc4a3a6fae92023fd1aae787a716ec..585a0123437a39c2b610306b18fe0a970c0ed072 100644 --- a/paddle/operators/math/detail/activation_functions.h +++ b/paddle/operators/math/detail/activation_functions.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include "paddle/platform/enforce.h" #include "paddle/platform/hostdevice.h" #ifdef __AVX__ @@ -29,6 +30,26 @@ namespace detail { #define SIGMOID_THRESHOLD_MAX 13.0 #define EXP_MAX_INPUT 40.0 +enum ActivationType { + kSigmoid, + kReLU, + kTanh, + kIdentity, +}; + +inline ActivationType GetActivationType(const std::string &type) { + if (type == "sigmoid") { + return ActivationType::kSigmoid; + } else if (type == "relu") { + return ActivationType::kReLU; + } else if (type == "tanh") { + return ActivationType::kTanh; + } else if (type == "identity" || type == "") { + return ActivationType::kIdentity; + } + PADDLE_THROW("Not support type %s.", type); +} + namespace forward { template diff --git a/paddle/operators/math/detail/gru_cpu_kernel.h b/paddle/operators/math/detail/gru_cpu_kernel.h index 4c67dec9cbeb48f400f79f5ed7ba3c939fa2540c..a61b232f4275d93cae1d9a71d49a779216c3555b 100644 --- a/paddle/operators/math/detail/gru_cpu_kernel.h +++ b/paddle/operators/math/detail/gru_cpu_kernel.h @@ -28,7 +28,7 @@ template void hl_naive_gru_forward_reset_output(OpResetOutput op_reset_output, T *gate_value, T *reset_output_value, T *prev_output_value, int frame_size, - activation_mode_t active_gate) { + ActivationType active_gate) { T r_value_update_gate; T r_value_reset_gate; T r_value_reset_output; @@ -56,7 +56,7 @@ template void hl_naive_gru_forward_final_output(OpFinalOutput op_final_output, T *gate_value, T *prev_output_value, T *output_value, int frame_size, - activation_mode_t active_node) { + ActivationType active_node) { T r_value_update_gate; T r_value_frame_state; T r_prev_out = 0; @@ -83,7 +83,7 @@ template void hl_avx_gru_forward_reset_output(OpResetOutput op_reset_output, T *gate_value, T *reset_output_value, T *prev_output_value, int frame_size, - activation_mode_t active_gate) { + ActivationType active_gate) { #ifdef __AVX__ __m256 r_value_update_gate; __m256 r_value_reset_gate; @@ -113,7 +113,7 @@ template void hl_avx_gru_forward_final_output(OpFinalOutput op_final_output, T *gate_value, T *prev_output_value, T *output_value, int frame_size, - activation_mode_t active_node) { + ActivationType active_node) { #ifdef __AVX__ __m256 r_value_update_gate; __m256 r_value_frame_state; @@ -140,9 +140,8 @@ void hl_avx_gru_forward_final_output(OpFinalOutput op_final_output, template inline void forward_reset_output(OpResetOutput op_reset_output, - hl_gru_value value, int frame_size, - int batch_size, - activation_mode_t active_gate) { + GRUMetaValue value, int frame_size, + int batch_size, ActivationType active_gate) { for (int b = 0; b < batch_size; b++) { if (OpResetOutput::avx && !(frame_size & (8 - 1)) && (sizeof(T) == 4)) { hl_avx_gru_forward_reset_output( @@ -164,9 +163,8 @@ inline void forward_reset_output(OpResetOutput op_reset_output, template inline void forward_final_output(OpFinalOutput op_final_output, - hl_gru_value value, int frame_size, - int batch_size, - activation_mode_t active_node) { + GRUMetaValue value, int frame_size, + int batch_size, ActivationType active_node) { for (int b = 0; b < batch_size; b++) { if (OpFinalOutput::avx && !(frame_size & (8 - 1)) && (sizeof(T) == 4)) { hl_avx_gru_forward_final_output(op_final_output, value.gate_value, @@ -191,7 +189,7 @@ void hl_naive_gru_backward_state_grad(OpStateGrad op_state_grad, T *gate_value, T *gate_grad, T *prev_out_value, T *prev_out_grad, T *output_grad, int frame_size, - activation_mode_t active_node) { + ActivationType active_node) { T r_update_gate_value; T r_update_gate_grad; T r_frame_state_value; @@ -232,7 +230,7 @@ void hl_naive_gru_backward_reset_grad(OpResetGrad op_reset_grad, T *gate_value, T *gate_grad, T *prev_out_value, T *prev_out_grad, T *reset_output_grad, int frame_size, - activation_mode_t active_gate) { + ActivationType active_gate) { T r_update_gate_value; T r_update_gate_grad; T r_reset_gate_value; @@ -277,7 +275,7 @@ void hl_avx_gru_backward_state_grad(OpStateGrad op_state_grad, T *gate_value, T *gate_grad, T *prev_out_value, T *prev_out_grad, T *output_grad, int frame_size, - activation_mode_t active_node) { + ActivationType active_node) { #ifdef __AVX__ __m256 r_update_gate_value; __m256 r_update_gate_grad; @@ -320,7 +318,7 @@ void hl_avx_gru_backward_reset_grad(OpResetGrad op_reset_grad, T *gate_value, T *gate_grad, T *prev_out_value, T *prev_out_grad, T *reset_output_grad, int frame_size, - activation_mode_t active_gate) { + ActivationType active_gate) { #ifdef __AVX__ __m256 r_update_gate_value; __m256 r_update_gate_grad; @@ -364,9 +362,9 @@ void hl_avx_gru_backward_reset_grad(OpResetGrad op_reset_grad, T *gate_value, template inline void backward_state_grad(OpStateGrad op_state_grad, - hl_gru_value value, hl_gru_grad grad, + GRUMetaValue value, GRUMetaGrad grad, int frame_size, int batch_size, - activation_mode_t active_node) { + ActivationType active_node) { for (int b = 0; b < batch_size; b++) { if (OpStateGrad::avx && !(frame_size & (8 - 1)) && (sizeof(T) == 4)) { hl_avx_gru_backward_state_grad( @@ -393,9 +391,9 @@ inline void backward_state_grad(OpStateGrad op_state_grad, template inline void backward_reset_grad(OpResetGrad op_reset_grad, - hl_gru_value value, hl_gru_grad grad, + GRUMetaValue value, GRUMetaGrad grad, int frame_size, int batch_size, - activation_mode_t active_gate) { + ActivationType active_gate) { for (int b = 0; b < batch_size; b++) { if (OpResetGrad::avx && !(frame_size & (8 - 1)) && (sizeof(T) == 4)) { hl_avx_gru_backward_reset_grad( diff --git a/paddle/operators/math/detail/gru_gpu_kernel.h b/paddle/operators/math/detail/gru_gpu_kernel.h index d2edcb7f258b387530799b967fc0fff61acc5b83..1783d46096858c874b27ce75760342082835b180 100644 --- a/paddle/operators/math/detail/gru_gpu_kernel.h +++ b/paddle/operators/math/detail/gru_gpu_kernel.h @@ -19,8 +19,6 @@ limitations under the License. */ #include "paddle/platform/cuda_helper.h" #include "paddle/platform/device_context.h" -#include - namespace paddle { namespace operators { namespace math { @@ -35,7 +33,7 @@ __global__ void KeGruForwardResetOutput(OpResetOutput op_reset_output, T *gate_value, T *reset_output_value, T *prev_output_value, int frame_size, int batch_size, - activation_mode_t active_gate) { + ActivationType active_gate) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; if (frame_idx >= frame_size) return; @@ -74,7 +72,7 @@ __global__ void KeGruForwardFinalOutput(OpFinalOutput op_final_output, T *gate_value, T *prev_output_value, T *output_value, int frame_size, int batch_size, - activation_mode_t active_node) { + ActivationType active_node) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; if (frame_idx >= frame_size) return; int batch_idx = 0; @@ -111,7 +109,7 @@ __global__ void KeGruBackwardStateGrad(OpStateGrad op_state_grad, T *gate_value, T *gate_grad, T *prev_out_value, T *prev_out_grad, T *output_grad, int frame_size, int batch_size, - activation_mode_t active_node) { + ActivationType active_node) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; if (frame_idx >= frame_size) return; int batch_idx = 0; @@ -159,7 +157,7 @@ __global__ void KeGruBackwardResetGrad(OpResetGrad op_reset_grad, T *gate_value, T *gate_grad, T *prev_out_value, T *prev_out_grad, T *reset_output_grad, int frame_size, int batch_size, - activation_mode_t active_gate) { + ActivationType active_gate) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; if (frame_idx >= frame_size) return; int batch_idx = 0; diff --git a/paddle/operators/math/detail/gru_kernel.h b/paddle/operators/math/detail/gru_kernel.h index acd84be01db9ddaf06d165d8be353b253f324dd2..4d8245cb5d03b33edbda5d8350be02b4fa87ab95 100644 --- a/paddle/operators/math/detail/gru_kernel.h +++ b/paddle/operators/math/detail/gru_kernel.h @@ -30,7 +30,7 @@ class gru_resetOutput { public: HOSTDEVICE void operator()(T &value_update_gate, T &value_reset_gate, T &prev_out, T &value_reset_output, - activation_mode_t act_gate) { + ActivationType act_gate) { value_update_gate = activation(value_update_gate, act_gate); value_reset_gate = activation(value_reset_gate, act_gate); value_reset_output = prev_out * value_reset_gate; @@ -43,7 +43,7 @@ class gru_resetOutput { HOSTDEVICE void operator()(__m256 &value_update_gate, __m256 &value_reset_gate, __m256 &prev_out, __m256 &value_reset_output, - activation_mode_t act_gate) { + ActivationType act_gate) { value_update_gate = activation(value_update_gate, act_gate); value_reset_gate = activation(value_reset_gate, act_gate); value_reset_output = _mm256_mul_ps(prev_out, value_reset_gate); @@ -57,7 +57,7 @@ class gru_finalOutput { public: HOSTDEVICE void operator()(T &value_update_gate, T &value_frame_state, T &prev_out, T &value_output, - activation_mode_t act_input) { + ActivationType act_input) { value_frame_state = activation(value_frame_state, act_input); value_output = prev_out - (value_update_gate * prev_out) + (value_update_gate * value_frame_state); @@ -69,8 +69,7 @@ class gru_finalOutput { static const bool avx = true; HOSTDEVICE void operator()(__m256 &value_update_gate, __m256 &value_frame_state, __m256 &prev_out, - __m256 &value_output, - activation_mode_t act_input) { + __m256 &value_output, ActivationType act_input) { value_frame_state = activation(value_frame_state, act_input); value_output = _mm256_add_ps( _mm256_sub_ps(prev_out, _mm256_mul_ps(value_update_gate, prev_out)), @@ -89,7 +88,7 @@ class gru_stateGrad { HOSTDEVICE void operator()(T &value_update_gate, T &grad_update_gate, T &value_frame_state, T &grad_frame_state, T &value_prev_out, T &grad_prev_out, - T &grad_output, activation_mode_t act_input) { + T &grad_output, ActivationType act_input) { grad_update_gate = (grad_output * value_frame_state); grad_update_gate -= (grad_output * value_prev_out); grad_prev_out -= (grad_output * value_update_gate); @@ -107,7 +106,7 @@ class gru_stateGrad { __m256 &value_frame_state, __m256 &grad_frame_state, __m256 &value_prev_out, __m256 &grad_prev_out, __m256 &grad_output, - activation_mode_t act_input) { + ActivationType act_input) { grad_update_gate = _mm256_mul_ps(grad_output, value_frame_state); grad_update_gate = _mm256_sub_ps( grad_update_gate, _mm256_mul_ps(grad_output, value_prev_out)); @@ -128,7 +127,7 @@ class gru_resetGrad { HOSTDEVICE void operator()(T &value_update_gate, T &grad_update_gate, T &value_reset_gate, T &grad_reset_gate, T &value_prev_out, T &grad_prev_out, - T &grad_reset_output, activation_mode_t act_gate) { + T &grad_reset_output, ActivationType act_gate) { grad_reset_gate = (grad_reset_output * value_prev_out); grad_prev_out += (grad_reset_output * value_reset_gate); grad_update_gate = @@ -144,7 +143,7 @@ class gru_resetGrad { __m256 &grad_update_gate, __m256 &value_reset_gate, __m256 &grad_reset_gate, __m256 &value_prev_out, __m256 &grad_prev_out, __m256 &grad_reset_output, - activation_mode_t act_gate) { + ActivationType act_gate) { grad_reset_gate = _mm256_mul_ps(grad_reset_output, value_prev_out); grad_prev_out = _mm256_add_ps( grad_prev_out, _mm256_mul_ps(grad_reset_output, value_reset_gate)); diff --git a/paddle/operators/math/detail/lstm_cpu_kernel.h b/paddle/operators/math/detail/lstm_cpu_kernel.h index a734ad31eea4816e952641bad73776d93d8c8d34..42888fcdb0a464892e3007ee73c195fcd2a431bb 100644 --- a/paddle/operators/math/detail/lstm_cpu_kernel.h +++ b/paddle/operators/math/detail/lstm_cpu_kernel.h @@ -26,10 +26,9 @@ namespace detail { template void naive_lstm_forward_one_sequence(Op op, LstmMetaValue value, - int frame_size, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + int frame_size, ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { T r_value_in; T r_value_ig; T r_value_fg; @@ -77,9 +76,9 @@ void naive_lstm_forward_one_sequence(Op op, LstmMetaValue value, template void naive_lstm_backward_one_sequence(Op op, LstmMetaValue value, LstmMetaGrad grad, int frame_size, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { T r_value_in; T r_value_ig; T r_value_fg; @@ -149,10 +148,9 @@ void naive_lstm_backward_one_sequence(Op op, LstmMetaValue value, template void avx_lstm_forward_one_sequence(Op op, LstmMetaValue value, - int frame_size, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + int frame_size, ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { #ifdef __AVX__ __m256 r_value_in; __m256 r_value_ig; @@ -204,9 +202,9 @@ void avx_lstm_forward_one_sequence(Op op, LstmMetaValue value, template void avx_lstm_backward_one_sequence(Op op, LstmMetaValue value, LstmMetaGrad grad, int frame_size, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { #ifdef __AVX__ __m256 r_value_in; __m256 r_value_ig; @@ -281,9 +279,8 @@ void avx_lstm_backward_one_sequence(Op op, LstmMetaValue value, template void cpu_lstm_forward(Op op, LstmMetaValue value, int frame_size, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, ActivationType active_gate, + ActivationType active_state) { if (Op::avx && !(frame_size & (8 - 1)) && (std::is_same::value)) { avx_lstm_forward_one_sequence(op, value, frame_size, active_node, active_gate, active_state); @@ -295,9 +292,9 @@ void cpu_lstm_forward(Op op, LstmMetaValue value, int frame_size, template void cpu_lstm_backward(Op op, LstmMetaValue value, LstmMetaGrad grad, - int frame_size, activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + int frame_size, ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { if (Op::avx && !(frame_size & (8 - 1)) && (std::is_same::value)) { avx_lstm_backward_one_sequence(op, value, grad, frame_size, active_node, active_gate, active_state); diff --git a/paddle/operators/math/detail/lstm_gpu_kernel.h b/paddle/operators/math/detail/lstm_gpu_kernel.h index 91bfedea53a2600156c9025f6ff3615d695a712b..e31e657e8b6964c2b99f6e456545c83d8da8e7f9 100644 --- a/paddle/operators/math/detail/lstm_gpu_kernel.h +++ b/paddle/operators/math/detail/lstm_gpu_kernel.h @@ -31,9 +31,9 @@ namespace detail { */ template __global__ void KeLstmForward(Op op, LstmMetaValue value, int frame_size, - int batch_size, activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + int batch_size, ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; if (frame_idx >= frame_size) return; @@ -91,9 +91,9 @@ __global__ void KeLstmForward(Op op, LstmMetaValue value, int frame_size, template __global__ void KeLstmBackward(Op op, LstmMetaValue value, LstmMetaGrad grad, int frame_size, - int batch_size, activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + int batch_size, ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { const int frame_idx = blockIdx.x * blockDim.x + threadIdx.x; if (frame_idx >= frame_size) return; @@ -185,9 +185,8 @@ __global__ void KeLstmBackward(Op op, LstmMetaValue value, template void gpu_lstm_forward(const platform::DeviceContext& context, Op op, LstmMetaValue value, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, ActivationType active_gate, + ActivationType active_state) { dim3 threads; dim3 grid; if (batch_size == 1) { @@ -220,9 +219,8 @@ template void gpu_lstm_backward(const platform::DeviceContext& context, Op op, LstmMetaValue value, LstmMetaGrad grad, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, ActivationType active_gate, + ActivationType active_state) { dim3 threads; dim3 grid; if (batch_size == 1) { diff --git a/paddle/operators/math/detail/lstm_kernel.h b/paddle/operators/math/detail/lstm_kernel.h index 78f9a249a3d5d413452952edf990975c02f1a369..fed8f9c4ca48905ad4c524ba400e8c7bb2f7fbd1 100644 --- a/paddle/operators/math/detail/lstm_kernel.h +++ b/paddle/operators/math/detail/lstm_kernel.h @@ -30,9 +30,9 @@ class lstm { HOSTDEVICE void operator()(T &value_in, T &value_ig, T &value_fg, T &value_og, T &prev_state, T &state, T &state_atv, T &output, T &checkI, T &checkF, T &checkO, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { value_in = activation(value_in, active_node); value_ig = activation(value_ig + prev_state * checkI, active_gate); value_fg = activation(value_fg + prev_state * checkF, active_gate); @@ -53,9 +53,9 @@ class lstm { __m256 &prev_state, __m256 &state, __m256 &state_atv, __m256 &output, __m256 &checkI, __m256 &checkF, __m256 &checkO, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { value_in = activation(value_in, active_node); value_ig = activation(_mm256_add_ps(value_ig, _mm256_mul_ps(prev_state, checkI)), @@ -87,9 +87,9 @@ class lstm { T &state_grad, T &state_atv, T &output_grad, T &checkI, T &checkF, T &checkO, T &checkIGrad, T &checkFGrad, T &checkOGrad, - activation_mode_t active_node, - activation_mode_t active_gate, - activation_mode_t active_state) { + ActivationType active_node, + ActivationType active_gate, + ActivationType active_state) { grad_og = activation(output_grad * state_atv, value_og, active_gate); state_grad += activation(output_grad * value_og, state_atv, active_state) + grad_og * checkO; @@ -114,8 +114,8 @@ class lstm { __m256 &prev_state, __m256 &prev_state_grad, __m256 &state, __m256 &state_grad, __m256 &state_atv, __m256 &output_grad, __m256 &checkI, __m256 &checkF, __m256 &checkO, __m256 &checkIGrad, - __m256 &checkFGrad, __m256 &checkOGrad, activation_mode_t active_node, - activation_mode_t active_gate, activation_mode_t active_state) { + __m256 &checkFGrad, __m256 &checkOGrad, ActivationType active_node, + ActivationType active_gate, ActivationType active_state) { grad_og = activation(_mm256_mul_ps(output_grad, state_atv), value_og, active_gate); state_grad = _mm256_add_ps(activation(_mm256_mul_ps(output_grad, value_og), diff --git a/paddle/operators/math/detection_util.h b/paddle/operators/math/detection_util.h new file mode 100644 index 0000000000000000000000000000000000000000..e3a3ef2badc37924d866ded8ee7a7338fbc4b2d2 --- /dev/null +++ b/paddle/operators/math/detection_util.h @@ -0,0 +1,300 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once +#include +#include "paddle/framework/selected_rows.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { +namespace math { +template +struct BBox { + BBox(T x_min, T y_min, T x_max, T y_max) + : x_min(x_min), + y_min(y_min), + x_max(x_max), + y_max(y_max), + is_difficult(false) {} + + BBox() {} + + T get_width() const { return x_max - x_min; } + + T get_height() const { return y_max - y_min; } + + T get_center_x() const { return (x_min + x_max) / 2; } + + T get_center_y() const { return (y_min + y_max) / 2; } + + T get_area() const { return get_width() * get_height(); } + + // coordinate of bounding box + T x_min; + T y_min; + T x_max; + T y_max; + // whether difficult object (e.g. object with heavy occlusion is difficult) + bool is_difficult; +}; +// KNCHW ==> NHWC +// template +template +void GetBBoxFromPriorData(const T* prior_data, const size_t num_bboxes, + std::vector>& bbox_vec); +template +void GetBBoxVarFromPriorData(const T* prior_data, const size_t num, + std::vector>& var_vec); +template +BBox DecodeBBoxWithVar(BBox& prior_bbox, + const std::vector& prior_bbox_var, + const std::vector& loc_pred_data); +template +bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2); +template +bool SortScorePairDescend(const std::pair>& pair1, + const std::pair>& pair2); +template +T jaccard_overlap(const BBox& bbox1, const BBox& bbox2); + +template +void ApplyNmsFast(const std::vector>& bboxes, const T* conf_score_data, + size_t class_idx, size_t top_k, T conf_threshold, + T nms_threshold, size_t num_priors, size_t num_classes, + std::vector* indices); +template +int GetDetectionIndices( + const T* conf_data, const size_t num_priors, const size_t num_classes, + const size_t background_label_id, const size_t batch_size, + const T conf_threshold, const size_t nms_top_k, const T nms_threshold, + const size_t top_k, + const std::vector>>& all_decoded_bboxes, + std::vector>>* all_detection_indices); +template +BBox ClipBBox(const BBox& bbox); +template +void GetDetectionOutput( + const T* conf_data, const size_t num_kept, const size_t num_priors, + const size_t num_classes, const size_t batch_size, + const std::vector>>& all_indices, + const std::vector>>& all_decoded_bboxes, T* out_data); +template +void GetBBoxFromPriorData(const T* prior_data, const size_t num_bboxes, + std::vector>& bbox_vec) { + size_t out_offset = bbox_vec.size(); + bbox_vec.resize(bbox_vec.size() + num_bboxes); + for (size_t i = 0; i < num_bboxes; ++i) { + BBox bbox; + bbox.x_min = *(prior_data + i * 8); + bbox.y_min = *(prior_data + i * 8 + 1); + bbox.x_max = *(prior_data + i * 8 + 2); + bbox.y_max = *(prior_data + i * 8 + 3); + bbox_vec[out_offset + i] = bbox; + } +} +template +void GetBBoxVarFromPriorData(const T* prior_data, const size_t num, + std::vector>& var_vec) { + size_t out_offset = var_vec.size(); + var_vec.resize(var_vec.size() + num); + for (size_t i = 0; i < num; ++i) { + std::vector var; + var.push_back(*(prior_data + i * 8 + 4)); + var.push_back(*(prior_data + i * 8 + 5)); + var.push_back(*(prior_data + i * 8 + 6)); + var.push_back(*(prior_data + i * 8 + 7)); + var_vec[out_offset + i] = var; + } +} +template +BBox DecodeBBoxWithVar(BBox& prior_bbox, + const std::vector& prior_bbox_var, + const std::vector& loc_pred_data) { + T prior_bbox_width = prior_bbox.get_width(); + T prior_bbox_height = prior_bbox.get_height(); + T prior_bbox_center_x = prior_bbox.get_center_x(); + T prior_bbox_center_y = prior_bbox.get_center_y(); + + T decoded_bbox_center_x = + prior_bbox_var[0] * loc_pred_data[0] * prior_bbox_width + + prior_bbox_center_x; + T decoded_bbox_center_y = + prior_bbox_var[1] * loc_pred_data[1] * prior_bbox_height + + prior_bbox_center_y; + T decoded_bbox_width = + std::exp(prior_bbox_var[2] * loc_pred_data[2]) * prior_bbox_width; + T decoded_bbox_height = + std::exp(prior_bbox_var[3] * loc_pred_data[3]) * prior_bbox_height; + + BBox decoded_bbox; + decoded_bbox.x_min = decoded_bbox_center_x - decoded_bbox_width / 2; + decoded_bbox.y_min = decoded_bbox_center_y - decoded_bbox_height / 2; + decoded_bbox.x_max = decoded_bbox_center_x + decoded_bbox_width / 2; + decoded_bbox.y_max = decoded_bbox_center_y + decoded_bbox_height / 2; + + return decoded_bbox; +} +template +bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2) { + return pair1.first > pair2.first; +} +template +T jaccard_overlap(const BBox& bbox1, const BBox& bbox2) { + if (bbox2.x_min > bbox1.x_max || bbox2.x_max < bbox1.x_min || + bbox2.y_min > bbox1.y_max || bbox2.y_max < bbox1.y_min) { + return 0.0; + } else { + T inter_x_min = std::max(bbox1.x_min, bbox2.x_min); + T inter_y_min = std::max(bbox1.y_min, bbox2.y_min); + T interX_max = std::min(bbox1.x_max, bbox2.x_max); + T interY_max = std::min(bbox1.y_max, bbox2.y_max); + + T inter_width = interX_max - inter_x_min; + T inter_height = interY_max - inter_y_min; + T inter_area = inter_width * inter_height; + + T bbox_area1 = bbox1.get_area(); + T bbox_area2 = bbox2.get_area(); + + return inter_area / (bbox_area1 + bbox_area2 - inter_area); + } +} + +template +void ApplyNmsFast(const std::vector>& bboxes, const T* conf_score_data, + size_t class_idx, size_t top_k, T conf_threshold, + T nms_threshold, size_t num_priors, size_t num_classes, + std::vector* indices) { + std::vector> scores; + for (size_t i = 0; i < num_priors; ++i) { + size_t conf_offset = i * num_classes + class_idx; + if (conf_score_data[conf_offset] > conf_threshold) + scores.push_back(std::make_pair(conf_score_data[conf_offset], i)); + } + std::stable_sort(scores.begin(), scores.end(), + SortScorePairDescend); + if (top_k > 0 && top_k < scores.size()) scores.resize(top_k); + while (scores.size() > 0) { + const size_t idx = scores.front().second; + bool keep = true; + for (size_t i = 0; i < indices->size(); ++i) { + if (keep) { + const size_t saved_idx = (*indices)[i]; + T overlap = jaccard_overlap(bboxes[idx], bboxes[saved_idx]); + keep = overlap <= nms_threshold; + } else { + break; + } + } + if (keep) indices->push_back(idx); + scores.erase(scores.begin()); + } +} +template +int GetDetectionIndices( + const T* conf_data, const size_t num_priors, const size_t num_classes, + const size_t background_label_id, const size_t batch_size, + const T conf_threshold, const size_t nms_top_k, const T nms_threshold, + const size_t top_k, + const std::vector>>& all_decoded_bboxes, + std::vector>>* all_detection_indices) { + int total_keep_num = 0; + for (size_t n = 0; n < batch_size; ++n) { + const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; + size_t num_detected = 0; + std::map> indices; + size_t conf_offset = n * num_priors * num_classes; + for (size_t c = 0; c < num_classes; ++c) { + if (c == background_label_id) continue; + ApplyNmsFast(decoded_bboxes, conf_data + conf_offset, c, nms_top_k, + conf_threshold, nms_threshold, num_priors, num_classes, + &(indices[c])); + num_detected += indices[c].size(); + } + if (top_k > 0 && num_detected > top_k) { + // std::vector> score_index_pairs; + std::vector>> score_index_pairs; + for (size_t c = 0; c < num_classes; ++c) { + const std::vector& label_indices = indices[c]; + for (size_t i = 0; i < label_indices.size(); ++i) { + size_t idx = label_indices[i]; + score_index_pairs.push_back( + std::make_pair((conf_data + conf_offset)[idx * num_classes + c], + std::make_pair(c, idx))); + } + } + std::sort(score_index_pairs.begin(), score_index_pairs.end(), + SortScorePairDescend>); + score_index_pairs.resize(top_k); + std::map> new_indices; + for (size_t i = 0; i < score_index_pairs.size(); ++i) { + size_t label = score_index_pairs[i].second.first; + size_t idx = score_index_pairs[i].second.second; + new_indices[label].push_back(idx); + } + all_detection_indices->push_back(new_indices); + total_keep_num += top_k; + } else { + all_detection_indices->push_back(indices); + total_keep_num += num_detected; + } + } + return total_keep_num; +} +template +BBox ClipBBox(const BBox& bbox) { + T one = static_cast(1.0); + T zero = static_cast(0.0); + BBox clipped_bbox; + clipped_bbox.x_min = std::max(std::min(bbox.x_min, one), zero); + clipped_bbox.y_min = std::max(std::min(bbox.y_min, one), zero); + clipped_bbox.x_max = std::max(std::min(bbox.x_max, one), zero); + clipped_bbox.y_max = std::max(std::min(bbox.y_max, one), zero); + return clipped_bbox; +} +template +void GetDetectionOutput( + const T* conf_data, const size_t num_kept, const size_t num_priors, + const size_t num_classes, const size_t batch_size, + const std::vector>>& all_indices, + const std::vector>>& all_decoded_bboxes, T* out_data) { + size_t count = 0; + for (size_t n = 0; n < batch_size; ++n) { + for (std::map>::const_iterator it = + all_indices[n].begin(); + it != all_indices[n].end(); ++it) { + size_t label = it->first; + const std::vector& indices = it->second; + const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; + for (size_t i = 0; i < indices.size(); ++i) { + size_t idx = indices[i]; + size_t conf_offset = n * num_priors * num_classes + idx * num_classes; + out_data[count * 7] = n; + out_data[count * 7 + 1] = label; + out_data[count * 7 + 2] = (conf_data + conf_offset)[label]; + BBox clipped_bbox = ClipBBox(decoded_bboxes[idx]); + out_data[count * 7 + 3] = clipped_bbox.x_min; + out_data[count * 7 + 4] = clipped_bbox.y_min; + out_data[count * 7 + 5] = clipped_bbox.x_max; + out_data[count * 7 + 6] = clipped_bbox.y_max; + ++count; + } + } + } +} +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/gru_compute.cc b/paddle/operators/math/gru_compute.cc index d570c68cd458914c8951c4ce50a02e3c5b1acab0..101ab859624869bf34d171cd42d46d0c5bdac29c 100644 --- a/paddle/operators/math/gru_compute.cc +++ b/paddle/operators/math/gru_compute.cc @@ -21,9 +21,9 @@ namespace math { template struct GRUUnitFunctor { static void compute(const platform::CPUDeviceContext &context, - hl_gru_value value, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate) { + GRUMetaValue value, int frame_size, int batch_size, + const detail::ActivationType active_node, + const detail::ActivationType active_gate) { #ifndef __NVCC__ if (value.prev_out_value) { math::gemm( @@ -51,10 +51,10 @@ struct GRUUnitFunctor { template struct GRUUnitGradFunctor { static void compute(const platform::CPUDeviceContext &context, - hl_gru_value value, hl_gru_grad grad, + GRUMetaValue value, GRUMetaGrad grad, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate) { + const detail::ActivationType active_node, + const detail::ActivationType active_gate) { #ifndef __NVCC__ detail::backward_state_grad(detail::backward::gru_stateGrad(), value, grad, frame_size, batch_size, active_node); diff --git a/paddle/operators/math/gru_compute.cu b/paddle/operators/math/gru_compute.cu index dd518cd1e4bea52f0d463150114feed3ceea0ccb..d5a0e630ea0eadea990988c3170395c842a91900 100644 --- a/paddle/operators/math/gru_compute.cu +++ b/paddle/operators/math/gru_compute.cu @@ -21,9 +21,9 @@ namespace math { template struct GRUUnitFunctor { static void compute(const platform::CUDADeviceContext &context, - hl_gru_value value, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate) { + GRUMetaValue value, int frame_size, int batch_size, + const detail::ActivationType active_node, + const detail::ActivationType active_gate) { auto stream = context.stream(); dim3 threads; dim3 grid; @@ -88,10 +88,10 @@ struct GRUUnitFunctor { template struct GRUUnitGradFunctor { static void compute(const platform::CUDADeviceContext &context, - hl_gru_value value, hl_gru_grad grad, + GRUMetaValue value, GRUMetaGrad grad, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate) { + const detail::ActivationType active_node, + const detail::ActivationType active_gate) { auto stream = context.stream(); dim3 threads; dim3 grid; diff --git a/paddle/operators/math/gru_compute.h b/paddle/operators/math/gru_compute.h index ca1343cb2c5c1eb8da92c2f06b25902c1c2fe8b3..bf69147b506661692a6d71823043cd3506ea8b5d 100644 --- a/paddle/operators/math/gru_compute.h +++ b/paddle/operators/math/gru_compute.h @@ -11,7 +11,7 @@ limitations under the License. */ #pragma once -#include "paddle/operators/math/lstm_compute.h" +#include "paddle/operators/math/detail/activation_functions.h" #include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" @@ -19,9 +19,8 @@ namespace paddle { namespace operators { namespace math { -// TODO(guosheng): refine code style in gru_compute template -struct hl_gru_value { +struct GRUMetaValue { T *gate_weight; T *state_weight; T *gate_value; @@ -31,7 +30,7 @@ struct hl_gru_value { }; template -struct hl_gru_grad { +struct GRUMetaGrad { T *gate_weight_grad; T *state_weight_grad; T *gate_grad; @@ -42,18 +41,18 @@ struct hl_gru_grad { template struct GRUUnitFunctor { - static void compute(const DeviceContext &context, hl_gru_value value, + static void compute(const DeviceContext &context, GRUMetaValue value, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate); + const detail::ActivationType active_node, + const detail::ActivationType active_gate); }; template struct GRUUnitGradFunctor { - static void compute(const DeviceContext &context, hl_gru_value value, - hl_gru_grad grad, int frame_size, int batch_size, - activation_mode_t active_node, - activation_mode_t active_gate); + static void compute(const DeviceContext &context, GRUMetaValue value, + GRUMetaGrad grad, int frame_size, int batch_size, + const detail::ActivationType active_node, + const detail::ActivationType active_gate); }; } // namespace math diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 256f3bc9bd487d11b0f139ef057f5a98556b4db1..26c038e435827b401d723ee6eef2255a89670f46 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -159,6 +159,7 @@ void testIm2col() { TEST(math, im2col) { testIm2col(); #ifdef PADDLE_WITH_CUDA - testIm2col(); + testIm2col(); #endif } diff --git a/paddle/operators/math/lstm_compute.cc b/paddle/operators/math/lstm_compute.cc index 2c2e8bb82e6f51e21a00de53bbfce5f0b4868e27..d453102ecefc9d79e1f4474ba94be0eb69a87c85 100644 --- a/paddle/operators/math/lstm_compute.cc +++ b/paddle/operators/math/lstm_compute.cc @@ -24,12 +24,12 @@ template struct LstmUnitFunctor { static void compute(const platform::CPUDeviceContext& context, LstmMetaValue value, int frame_size, int batch_size, - const std::string& gate_act, const std::string& cell_act, - const std::string& cand_act) { + const detail::ActivationType& gate_act, + const detail::ActivationType& cell_act, + const detail::ActivationType& cand_act) { for (int b = 0; b < batch_size; b++) { detail::cpu_lstm_forward(detail::forward::lstm(), value, frame_size, - ActiveType(cand_act), ActiveType(gate_act), - ActiveType(cell_act)); + cand_act, gate_act, cell_act); value.gate_value += frame_size * 4; value.state_value += frame_size; value.state_active_value += frame_size; @@ -46,12 +46,12 @@ struct LstmUnitGradFunctor { static void compute(const platform::CPUDeviceContext& context, LstmMetaValue value, LstmMetaGrad grad, int frame_size, int batch_size, - const std::string& gate_act, const std::string& cell_act, - const std::string& cand_act) { + const detail::ActivationType& gate_act, + const detail::ActivationType& cell_act, + const detail::ActivationType& cand_act) { for (int b = 0; b < batch_size; b++) { detail::cpu_lstm_backward(detail::backward::lstm(), value, grad, - frame_size, ActiveType(cand_act), - ActiveType(gate_act), ActiveType(cell_act)); + frame_size, cand_act, gate_act, cell_act); value.gate_value += frame_size * 4; value.state_value += frame_size; diff --git a/paddle/operators/math/lstm_compute.cu b/paddle/operators/math/lstm_compute.cu index 92b1f4228b49709d2903fab518e7649133932fad..82065d699f760db6cc86bf3d6c56e51c583c6ace 100644 --- a/paddle/operators/math/lstm_compute.cu +++ b/paddle/operators/math/lstm_compute.cu @@ -24,11 +24,12 @@ template struct LstmUnitFunctor { static void compute(const platform::CUDADeviceContext& context, LstmMetaValue value, int frame_size, int batch_size, - const std::string& gate_act, const std::string& cell_act, - const std::string& cand_act) { + const detail::ActivationType& gate_act, + const detail::ActivationType& cell_act, + const detail::ActivationType& cand_act) { detail::gpu_lstm_forward(context, detail::forward::lstm(), value, - frame_size, batch_size, ActiveType(cand_act), - ActiveType(gate_act), ActiveType(cell_act)); + frame_size, batch_size, cand_act, gate_act, + cell_act); } }; @@ -37,11 +38,12 @@ struct LstmUnitGradFunctor { static void compute(const platform::CUDADeviceContext& context, LstmMetaValue value, LstmMetaGrad grad, int frame_size, int batch_size, - const std::string& gate_act, const std::string& cell_act, - const std::string& cand_act) { + const detail::ActivationType& gate_act, + const detail::ActivationType& cell_act, + const detail::ActivationType& cand_act) { detail::gpu_lstm_backward(context, detail::backward::lstm(), value, grad, - frame_size, batch_size, ActiveType(cand_act), - ActiveType(gate_act), ActiveType(cell_act)); + frame_size, batch_size, cand_act, gate_act, + cell_act); } }; diff --git a/paddle/operators/math/lstm_compute.h b/paddle/operators/math/lstm_compute.h index 5f74e273585aea5184281bf294df694235150e30..e1ad6b64d201ef99d83eaa2a821356008dcc9c8e 100644 --- a/paddle/operators/math/lstm_compute.h +++ b/paddle/operators/math/lstm_compute.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include "paddle/operators/math/detail/activation_functions.h" #include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" @@ -21,14 +22,6 @@ namespace paddle { namespace operators { namespace math { -typedef enum { - HL_ACTIVATION_SIGMOID = 0, - HL_ACTIVATION_RELU = 1, - HL_ACTIVATION_TANH = 2, - HL_ACTIVATION_LINEAR = 3, - HL_ACTIVATION_END -} activation_mode_t; - template struct LstmMetaValue { T *gate_value; @@ -53,27 +46,14 @@ struct LstmMetaGrad { T *check_og_grad; }; -inline activation_mode_t ActiveType(const std::string &type) { - if (type == "sigmoid") { - return HL_ACTIVATION_SIGMOID; - } else if (type == "relu") { - return HL_ACTIVATION_RELU; - } else if (type == "tanh") { - return HL_ACTIVATION_TANH; - } else if (type == "linear" || type == "identity" || type == "") { - return HL_ACTIVATION_LINEAR; - } else { - PADDLE_THROW("Do not support activation type."); - } -} - template class LstmUnitFunctor { public: static void compute(const DeviceContext &context, LstmMetaValue value, int frame_size, int batch_size, - const std::string &gate_act, const std::string &cell_act, - const std::string &cand_act); + const detail::ActivationType &gate_act, + const detail::ActivationType &cell_act, + const detail::ActivationType &cand_act); }; template @@ -81,8 +61,9 @@ class LstmUnitGradFunctor { public: static void compute(const DeviceContext &context, LstmMetaValue value, LstmMetaGrad grad, int frame_size, int batch_size, - const std::string &gate_act, const std::string &cell_act, - const std::string &cand_act); + const detail::ActivationType &gate_act, + const detail::ActivationType &cell_act, + const detail::ActivationType &cand_act); }; } // namespace math diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index a05810d7781f5286e70b53005ef0b193c945c54c..dcf4b85e1aadf88e4b1ca70ac7e8b5416fc58cd8 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -245,9 +245,12 @@ template struct SetConstant; template struct SetConstant; template struct SetConstant; -#define DEFINE_CPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; +#define DEFINE_CPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; DEFINE_CPU_TRANS(1); DEFINE_CPU_TRANS(2); @@ -277,14 +280,6 @@ void set_constant_with_place( TensorSetConstantCPU(tensor, value)); } -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - framework::VisitDataType(framework::ToDataType(tensor->type()), - TensorSetConstantCPU(tensor, value)); -} - struct TensorSetConstantWithPlace : public boost::static_visitor { TensorSetConstantWithPlace(const platform::DeviceContext& context, framework::Tensor* tensor, float value) @@ -310,8 +305,29 @@ void set_constant(const platform::DeviceContext& context, #endif } +template +struct RowwiseAdd { + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& vector, framework::Tensor* output) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector.numel(), size); + PADDLE_ENFORCE_EQ(output->dims(), in_dims); + + auto in = framework::EigenMatrix::From(input); + auto vec = framework::EigenVector::Flatten(vector); + auto out = framework::EigenMatrix::From(*output); + + for (int64_t i = 0; i < in_dims[0]; ++i) { + out.chip(i, 0) = in.chip(i, 0) + vec; + } + } +}; + template struct RowwiseAdd; template struct RowwiseAdd; + template struct ColwiseSum; template struct ColwiseSum; diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 7852bb53a9035f71f52a51529c8e3cea22b0d4aa..d47a7f818ded61baf31e46ea3b8ae3101324111f 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -105,7 +105,7 @@ void matmul( PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && platform::is_gpu_place(matrix_b.place()) && platform::is_gpu_place(matrix_out->place()), - "Matrix must all be in GPUPlace"); + "Matrix must all be in CUDAPlace"); int M = dim_out[0]; int N = dim_out[1]; @@ -134,7 +134,7 @@ void matmul( PADDLE_ENFORCE(platform::is_gpu_place(matrix_a.place()) && platform::is_gpu_place(matrix_b.place()) && platform::is_gpu_place(matrix_out->place()), - "Matrix must all be in GPUPlace"); + "Matrix must all be in CUDAPlace"); int M = dim_out[0]; int N = dim_out[1]; @@ -266,20 +266,42 @@ struct TensorSetConstantGPU { }; template <> -void set_constant_with_place( +void set_constant_with_place( const platform::DeviceContext& context, framework::Tensor* tensor, float value) { framework::VisitDataType(framework::ToDataType(tensor->type()), TensorSetConstantGPU(context, tensor, value)); } -template <> -void set_constant_with_place( - const platform::DeviceContext& context, framework::Tensor* tensor, - float value) { - set_constant_with_place(context, tensor, value); +template +__global__ void RowwiseAddKernel(const T* a, const T* b, T* c, int width, + int num) { + T tmp = 1.0 / width; + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; + i += blockDim.x * gridDim.x) { + int h = i * tmp; + int w = i - h * width; + c[i] = a[i] + b[w]; + } } +template +struct RowwiseAdd { + void operator()(const platform::CUDADeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& vector, framework::Tensor* output) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector.numel(), size); + PADDLE_ENFORCE_EQ(output->dims(), in_dims); + int blocks = 512; + int grids = (input.numel() + blocks - 1) / blocks; + RowwiseAddKernel<<>>( + input.data(), vector.data(), output->data(), + static_cast(in_dims[1]), static_cast(input.numel())); + } +}; + template struct RowwiseAdd; template struct RowwiseAdd; template struct ColwiseSum; diff --git a/paddle/operators/math/math_function_impl.h b/paddle/operators/math/math_function_impl.h index aced2690bce9a4c2db6309f18e23a8f6cd0211f3..de591626df28e2bc3391b609f909612411398247 100644 --- a/paddle/operators/math/math_function_impl.h +++ b/paddle/operators/math/math_function_impl.h @@ -45,25 +45,6 @@ void Transpose::operator()( eigen_out.device(*dev) = eigen_in.shuffle(permute); } -template -void RowwiseAdd::operator()(const DeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& vector, - framework::Tensor* output) { - auto in_dims = input.dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ(vector.numel(), size); - PADDLE_ENFORCE_EQ(output->dims(), in_dims); - - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenMatrix::From(vector); - auto out = framework::EigenMatrix::From(*output); - Eigen::array shape({{1, static_cast(size)}}); - Eigen::array bcast({{static_cast(in_dims[0]), 1}}); - out.device(*context.eigen_device()) = - in + vec.reshape(shape).broadcast(bcast); -} - template void ColwiseSum::operator()(const DeviceContext& context, const framework::Tensor& input, @@ -94,8 +75,8 @@ class ColwiseSum { T* out_buf = out->mutable_data(out->place()); const T* in_buf = input.data(); - for (size_t i = 0; i < height; ++i) { - for (size_t j = 0; j < size; ++j) { + for (size_t i = 0; i < static_cast(height); ++i) { + for (size_t j = 0; j < static_cast(size); ++j) { if (i == 0) { out_buf[j] = in_buf[i * size + j]; } else { diff --git a/paddle/operators/math/math_function_test.cu b/paddle/operators/math/math_function_test.cu index 32e96d948714a8fd1fa2c089057603fdaed85c16..4325a79664f15cfaea48870cd503ce70cc31044f 100644 --- a/paddle/operators/math/math_function_test.cu +++ b/paddle/operators/math/math_function_test.cu @@ -13,7 +13,7 @@ TEST(math_function, notrans_mul_trans) { float arr[6] = {0, 1, 2, 3, 4, 5}; memcpy(input1_ptr, arr, 6 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -47,7 +47,7 @@ TEST(math_function, trans_mul_notrans) { float arr[6] = {0, 1, 2, 3, 4, 5}; memcpy(input1_ptr, arr, 6 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -96,7 +96,7 @@ TEST(math_function, gemm_notrans_cublas) { float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7}; memcpy(input3_ptr, arr3, 8 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -151,7 +151,7 @@ TEST(math_function, gemm_trans_cublas) { float arr3[8] = {0, 1, 2, 3, 4, 5, 6, 7}; memcpy(input3_ptr, arr3, 8 * sizeof(float)); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); @@ -189,7 +189,7 @@ void GemvTest(int m, int n, bool trans) { T* data_b = vec_b.mutable_data({trans ? m : n}, *cpu_place); T* data_c = vec_c.mutable_data({trans ? n : m}, *cpu_place); - auto* gpu_place = new paddle::platform::GPUPlace(0); + auto* gpu_place = new paddle::platform::CUDAPlace(0); paddle::framework::Tensor g_mat_a; paddle::framework::Tensor g_vec_b; paddle::framework::Tensor g_vec_c; diff --git a/paddle/operators/math/selected_rows_functor.cc b/paddle/operators/math/selected_rows_functor.cc index ab758d1e7fd8ab361948b28e8cb735b9a742a339..8a1ebb58c26578f076bf243adfbd51d10c682b99 100644 --- a/paddle/operators/math/selected_rows_functor.cc +++ b/paddle/operators/math/selected_rows_functor.cc @@ -12,8 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/math/selected_rows_functor.h" +#include + #include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/selected_rows_functor.h" namespace paddle { namespace operators { @@ -179,6 +181,118 @@ template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +// This is a separated namespace for manipulate SelectedRows typed +// data. Like merge duplicated rows, adding two SelectedRows etc. +// +// Another group of functors is called "scatter updates", which means +// use SelectedRows to update a dense tensor with different Ops, like +// add or mul. +namespace scatter { + +size_t FindPos(const std::vector& rows, int64_t value) { + return std::find(rows.begin(), rows.end(), value) - rows.begin(); +} + +template +struct MergeAdd { + framework::SelectedRows operator()(const platform::CPUDeviceContext& context, + const framework::SelectedRows& input) { + framework::SelectedRows out; + auto input_rows = input.rows(); + std::set row_set(input_rows.begin(), input_rows.end()); + std::vector merge_rows(row_set.begin(), row_set.end()); + + auto input_width = input.value().dims()[1]; + out.set_rows(merge_rows); + out.set_height(input.height()); + out.mutable_value()->mutable_data( + framework::make_ddim( + {static_cast(merge_rows.size()), input_width}), + context.GetPlace()); + + math::SetConstant constant_functor; + constant_functor(context, out.mutable_value(), 0.0); + + auto* out_data = out.mutable_value()->data(); + auto* input_data = input.value().data(); + + for (size_t i = 0; i < input_rows.size(); i++) { + size_t out_i = FindPos(merge_rows, input_rows[i]); + for (int64_t j = 0; j < input_width; j++) { + out_data[out_i * input_width + j] += input_data[i * input_width + j]; + } + } + return out; + } +}; + +template struct MergeAdd; +template struct MergeAdd; +template struct MergeAdd; +template struct MergeAdd; + +template +struct UpdateToTensor { + void operator()(const platform::CPUDeviceContext& context, + const ScatterOps& op, const framework::SelectedRows& input1, + framework::Tensor* input2) { + auto in1_height = input1.height(); + auto in2_dims = input2->dims(); + PADDLE_ENFORCE_EQ(in1_height, in2_dims[0]); + + auto& in1_value = input1.value(); + auto& in1_rows = input1.rows(); + + int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); + PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height); + + auto* in1_data = in1_value.data(); + auto* input2_data = input2->data(); + + // FIXME(typhoonzero): use macro fix the below messy code. + switch (op) { + case ScatterOps::ASSIGN: + INLINE_FOR2(in1_rows.size(), in1_row_numel) + input2_data[in1_rows[i] * in1_row_numel + j] = + in1_data[i * in1_row_numel + j]; + break; + case ScatterOps::ADD: + INLINE_FOR2(in1_rows.size(), in1_row_numel) + input2_data[in1_rows[i] * in1_row_numel + j] += + in1_data[i * in1_row_numel + j]; + break; + case ScatterOps::SUB: + INLINE_FOR2(in1_rows.size(), in1_row_numel) + input2_data[in1_rows[i] * in1_row_numel + j] -= + in1_data[i * in1_row_numel + j]; + break; + case ScatterOps::SUBBY: + INLINE_FOR2(in1_rows.size(), in1_row_numel) + input2_data[in1_rows[i] * in1_row_numel + j] = + in1_data[i * in1_row_numel + j] - + input2_data[in1_rows[i] * in1_row_numel + j]; + break; + case ScatterOps::MUL: + INLINE_FOR2(in1_rows.size(), in1_row_numel) + input2_data[in1_rows[i] * in1_row_numel + j] *= + in1_data[i * in1_row_numel + j]; + break; + case ScatterOps::DIV: + INLINE_FOR2(in1_rows.size(), in1_row_numel) + input2_data[in1_rows[i] * in1_row_numel + j] /= + in1_data[i * in1_row_numel + j]; + break; + case ScatterOps::DIVBY: + INLINE_FOR2(in1_rows.size(), in1_row_numel) + input2_data[in1_rows[i] * in1_row_numel + j] = + in1_data[i * in1_row_numel + j] / + input2_data[in1_rows[i] * in1_row_numel + j]; + break; + } + } +}; + +} // namespace scatter } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index c44577e00af5f362ae7e168495e496d60d05de95..0ee456f9bc61436bd0f2f8ef20dd1654e7e56d56 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/operators/math/math_function.h" #include "paddle/operators/math/selected_rows_functor.h" #include "paddle/platform/cuda_helper.h" @@ -58,15 +60,15 @@ struct SelectedRowsAdd { PADDLE_ENFORCE(platform::is_gpu_place(out_place)); memory::Copy( - boost::get(out_place), out_data, - boost::get(in1_place), in1_data, + boost::get(out_place), out_data, + boost::get(in1_place), in1_data, in1_value.numel() * sizeof(T), reinterpret_cast(context).stream()); auto* in2_data = in2_value.data(); - memory::Copy(boost::get(out_place), + memory::Copy(boost::get(out_place), out_data + in1_value.numel(), - boost::get(in2_place), in2_data, + boost::get(in2_place), in2_data, in2_value.numel() * sizeof(T), context.stream()); } }; @@ -160,9 +162,9 @@ struct SelectedRowsAddTo { auto* in1_data = in1_value.data(); auto* in2_data = in2_value->data(); - memory::Copy(boost::get(in2_place), + memory::Copy(boost::get(in2_place), in2_data + input2_offset, - boost::get(in1_place), in1_data, + boost::get(in1_place), in1_data, in1_value.numel() * sizeof(T), context.stream()); } }; @@ -222,6 +224,157 @@ template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; + +namespace scatter { + +template +__global__ void MergeAddKernel(const T* input, const int64_t* input_rows, + T* out, const int64_t* out_rows, + size_t out_rows_size, int64_t row_numel) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + __shared__ size_t out_idx; + + if (tid == 0) { + for (size_t i = 0; i < out_rows_size; i++) { + if (input_rows[ty] == out_rows[i]) { + out_idx = i; + } + } + } + + __syncthreads(); + + input += ty * row_numel; + out += out_idx * row_numel; + for (int index = tid; index < row_numel; index += block_size) { + paddle::platform::CudaAtomicAdd(out + index, input[index]); + } +} + +template +struct MergeAdd { + framework::SelectedRows operator()(const platform::CUDADeviceContext& context, + const framework::SelectedRows& input) { + framework::SelectedRows out; + auto input_rows = input.rows(); + std::set row_set(input_rows.begin(), input_rows.end()); + std::vector merge_rows(row_set.begin(), row_set.end()); + + auto input_width = input.value().dims()[1]; + + out.set_rows(merge_rows); + out.set_height(input.height()); + out.mutable_value()->mutable_data( + framework::make_ddim( + {static_cast(merge_rows.size()), input_width}), + context.GetPlace()); + + math::SetConstant constant_functor; + constant_functor(context, out.mutable_value(), 0.0); + + auto* out_data = out.mutable_value()->data(); + auto* input_data = input.value().data(); + + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid1(1, input_rows.size()); + + MergeAddKernel< + T, 256><<(context) + .stream()>>>(input_data, input.rows().data(), out_data, + out.rows().data(), out.rows().size(), + input_width); + return out; + } +}; + +template struct MergeAdd; +template struct MergeAdd; +template struct MergeAdd; +template struct MergeAdd; + +template +__global__ void UpdateToTensorKernel(const T* selected_rows, + const int64_t* rows, const ScatterOps& op, + T* tensor_out, int64_t row_numel) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + + selected_rows += ty * row_numel; + tensor_out += rows[ty] * row_numel; + // FIXME(typhoonzero): use macro fix the below messy code. + switch (op) { + case ScatterOps::ASSIGN: + for (int index = tid; index < row_numel; index += block_size) { + tensor_out[index] = selected_rows[index]; + } + break; + case ScatterOps::ADD: + for (int index = tid; index < row_numel; index += block_size) { + tensor_out[index] += selected_rows[index]; + } + break; + case ScatterOps::SUB: + for (int index = tid; index < row_numel; index += block_size) { + tensor_out[index] -= selected_rows[index]; + } + break; + case ScatterOps::SUBBY: + for (int index = tid; index < row_numel; index += block_size) { + tensor_out[index] = selected_rows[index] - tensor_out[index]; + } + break; + case ScatterOps::MUL: + for (int index = tid; index < row_numel; index += block_size) { + tensor_out[index] *= selected_rows[index]; + } + break; + case ScatterOps::DIV: + for (int index = tid; index < row_numel; index += block_size) { + tensor_out[index] /= selected_rows[index]; + } + break; + case ScatterOps::DIVBY: + for (int index = tid; index < row_numel; index += block_size) { + tensor_out[index] = selected_rows[index] / tensor_out[index]; + } + break; + } +} + +template +struct UpdateToTensor { + void operator()(const platform::CUDADeviceContext& context, + const ScatterOps& op, const framework::SelectedRows& input1, + framework::Tensor* input2) { + // NOTE: Use SelectedRowsAddToTensor for better performance + // no additional MergeAdd called. + MergeAdd merge_func; + auto merged_in1 = merge_func(context, input1); + + auto in1_height = merged_in1.height(); + auto in2_dims = input2->dims(); + PADDLE_ENFORCE_EQ(in1_height, in2_dims[0]); + + auto& in1_value = merged_in1.value(); + auto& in1_rows = merged_in1.rows(); + + int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); + PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height); + + auto* in1_data = in1_value.template data(); + auto* in2_data = input2->data(); + + dim3 threads(platform::PADDLE_CUDA_NUM_THREADS, 1); + dim3 grid(1, in1_rows.size()); + UpdateToTensorKernel<<< + grid, threads, 0, context.stream()>>>(in1_data, in1_rows.data(), op, + in2_data, in1_row_numel); + } +}; +} // namespace scatter } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/selected_rows_functor.h b/paddle/operators/math/selected_rows_functor.h index 1149075abf16547a120ac8928c45b4972409fc72..09d4631905f90f78772368ad71b11826877bdc34 100644 --- a/paddle/operators/math/selected_rows_functor.h +++ b/paddle/operators/math/selected_rows_functor.h @@ -12,9 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/framework/eigen.h" #include "paddle/framework/selected_rows.h" #include "paddle/platform/device_context.h" +#define INLINE_FOR2(sizei, sizej) \ + for (int64_t i = 0; i < sizei; i++) \ + for (int64_t j = 0; j < sizej; j++) + namespace paddle { namespace operators { namespace math { @@ -52,6 +57,78 @@ struct SelectedRowsAddToTensor { framework::Tensor* input2); }; +namespace scatter { +// functors for manuplating SelectedRows data +template +struct MergeAdd { + // unary functor, merge by adding duplicated rows in + // the input SelectedRows object. + framework::SelectedRows operator()(const DeviceContext& context, + const framework::SelectedRows& input); +}; + +template +struct Add { + framework::SelectedRows operator()(const DeviceContext& context, + const framework::SelectedRows& input1, + const framework::SelectedRows& input2) { + framework::SelectedRows out; + out.set_rows(input1.rows()); + out.set_height(input1.height()); + out.mutable_value()->mutable_data(input1.value().dims(), + context.GetPlace()); + auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); + auto e_in1 = framework::EigenVector::Flatten(input1.value()); + auto e_in2 = framework::EigenVector::Flatten(input2.value()); + e_out.device(*context.eigen_device()) = e_in1 + e_in2; + return out; + } +}; + +template +struct Mul { + // multiply two SelectedRows + framework::SelectedRows operator()(const DeviceContext& context, + const framework::SelectedRows& input1, + const framework::SelectedRows& input2) { + framework::SelectedRows out; + out.set_rows(input1.rows()); + out.set_height(input1.height()); + out.mutable_value()->mutable_data(input1.value().dims(), + context.GetPlace()); + auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); + auto e_in1 = framework::EigenVector::Flatten(input1.value()); + auto e_in2 = framework::EigenVector::Flatten(input2.value()); + e_out.device(*context.eigen_device()) = e_in1 * e_in2; + return out; + } + // multiply scalar to SelectedRows + framework::SelectedRows operator()(const DeviceContext& context, + const framework::SelectedRows& input1, + const T input2) { + framework::SelectedRows out; + out.set_rows(input1.rows()); + out.set_height(input1.height()); + out.mutable_value()->mutable_data(input1.value().dims(), + context.GetPlace()); + auto e_out = framework::EigenVector::Flatten(*(out.mutable_value())); + auto e_in1 = framework::EigenVector::Flatten(input1.value()); + e_out.device(*context.eigen_device()) = input2 * e_in1; + return out; + } +}; + +enum class ScatterOps { ASSIGN, ADD, SUB, SUBBY, MUL, DIV, DIVBY }; + +// out = seleted_rows_in / tensor +template +struct UpdateToTensor { + void operator()(const DeviceContext& context, const ScatterOps& op, + const framework::SelectedRows& input1, + framework::Tensor* input2); +}; + +} // namespace scatter } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/selected_rows_functor_test.cu b/paddle/operators/math/selected_rows_functor_test.cu index 777caf5635647d11e8fde05a68fdf7e2c32f48df..0a2e36f68acee04bd6b272d37679c18231cb8760 100644 --- a/paddle/operators/math/selected_rows_functor_test.cu +++ b/paddle/operators/math/selected_rows_functor_test.cu @@ -21,7 +21,7 @@ TEST(selected_rows_functor, gpu_add) { using namespace paddle::platform; using namespace paddle::operators::math; - GPUPlace gpu_place(0); + CUDAPlace gpu_place(0); CPUPlace cpu_place; CUDADeviceContext ctx(gpu_place); SetConstant functor; @@ -119,7 +119,7 @@ TEST(selected_rows_functor, gpu_add_to) { using namespace paddle::platform; using namespace paddle::operators::math; - GPUPlace gpu_place(0); + CUDAPlace gpu_place(0); CPUPlace cpu_place; CUDADeviceContext ctx(gpu_place); SetConstant functor; diff --git a/paddle/operators/math/vol2col_test.cc b/paddle/operators/math/vol2col_test.cc index f46db3c56713399798a45854bf1613d07aee26e6..3794f0e52d200a08253a979991da04ec564cae47 100644 --- a/paddle/operators/math/vol2col_test.cc +++ b/paddle/operators/math/vol2col_test.cc @@ -122,6 +122,6 @@ TEST(math, vol2col) { testVol2col(); #ifdef PADDLE_WITH_CUDA testVol2col(); + paddle::platform::CUDAPlace>(); #endif // PADDLE_WITH_CUDA } diff --git a/paddle/operators/matmul_op.cu.cc b/paddle/operators/matmul_op.cu.cc index 6a3772c00457993dcc7b55a0f15493974633026c..d28d12164e493786c5bdafb157795d797ee87b91 100644 --- a/paddle/operators/matmul_op.cu.cc +++ b/paddle/operators/matmul_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/matmul_op.h" diff --git a/paddle/operators/matmul_op.h b/paddle/operators/matmul_op.h index de9da487b3d627cc79962db3770632813e9cd9f5..78adc64f76f45afce64c49bcf734647e0db2d6b3 100644 --- a/paddle/operators/matmul_op.h +++ b/paddle/operators/matmul_op.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - You may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/max_sequence_len_op.cc b/paddle/operators/max_sequence_len_op.cc index dec2874a1fd13c1379e37d7b9755d465ffb1a6f7..019150e4914e8bd34a5e8b7d37318aee43942fcc 100644 --- a/paddle/operators/max_sequence_len_op.cc +++ b/paddle/operators/max_sequence_len_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/lod_rank_table.h" #include "paddle/framework/op_registry.h" @@ -28,7 +28,7 @@ class MaxSeqenceLenOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto &rank_table = scope.FindVar(Input("RankTable"))->Get(); auto *out = diff --git a/paddle/operators/maxout_op.cu.cc b/paddle/operators/maxout_op.cu.cc index 2904f0ff96f06cefad29a65898cd82107d9bd600..c4a2d676d3aca4d59d0bfa8c75aa0c249e202ab5 100644 --- a/paddle/operators/maxout_op.cu.cc +++ b/paddle/operators/maxout_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/maxout_op.h" diff --git a/paddle/operators/mean_op.cu b/paddle/operators/mean_op.cu index 93062bf540ad64350f7ee9a554c3c469aba46677..212d4481138c1478f6e3aa684008f9e42c5a3870 100644 --- a/paddle/operators/mean_op.cu +++ b/paddle/operators/mean_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc index 5edf29c3af958f5a939fdb830d46aca4b8d3dbe0..3f999e404f8afe6bded09c820509fa0f36d30bf6 100644 --- a/paddle/operators/merge_lod_tensor_op.cc +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -28,7 +28,11 @@ class MergeLoDTensorOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); + auto &x = scope.FindVar(Input("X"))->Get(); auto &mask = scope.FindVar(Input("Mask"))->Get(); auto &in_true = scope.FindVar(Input("InTrue"))->Get(); diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc index 2e9cc9d29d8c92ac56b451834f930758216e6a44..3d7742dd4bc2a3c727279bc1e6c7dd47b96eefa3 100644 --- a/paddle/operators/minus_op.cc +++ b/paddle/operators/minus_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/minus_op.h" #include "paddle/operators/net_op.h" diff --git a/paddle/operators/minus_op.cu b/paddle/operators/minus_op.cu index 3b202ea92ee8692f2441909083f559adff5fea8c..80cd9f7c16845904b7b46ae1597ce9558c32f46a 100644 --- a/paddle/operators/minus_op.cu +++ b/paddle/operators/minus_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/minus_op.h" diff --git a/paddle/operators/minus_op.h b/paddle/operators/minus_op.h index 78e1e1be6d622d504db9e664dcb5f35ca0c22b95..20760b8cd5bd2f74ed8469addda8f67f11f4545c 100644 --- a/paddle/operators/minus_op.h +++ b/paddle/operators/minus_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" diff --git a/paddle/operators/modified_huber_loss_op.cc b/paddle/operators/modified_huber_loss_op.cc index dbb28f8466b141502fbba8ae5d8a511a6b1d74c3..f5d69071a86e3f8037840c091cf5b7683e4eeb96 100644 --- a/paddle/operators/modified_huber_loss_op.cc +++ b/paddle/operators/modified_huber_loss_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/modified_huber_loss_op.h" diff --git a/paddle/operators/modified_huber_loss_op.cu b/paddle/operators/modified_huber_loss_op.cu index 40a8447da4d9d4874af232f3408557c950b58482..3d2a5562e8cc2117b0b460496d9ba8e96823fbfb 100644 --- a/paddle/operators/modified_huber_loss_op.cu +++ b/paddle/operators/modified_huber_loss_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ -#include -#include +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/modified_huber_loss_op.h b/paddle/operators/modified_huber_loss_op.h index 157ae0682e0cf4392dab003153d44f48209d00a1..6ce86feee574efca8811f316f47f1c3fbbdd0bf9 100644 --- a/paddle/operators/modified_huber_loss_op.h +++ b/paddle/operators/modified_huber_loss_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/momentum_op.cu b/paddle/operators/momentum_op.cu index 00f1253465d336e0fad580d0c6b898369e4783ca..2b9314162e6f10b3791c913203c732d2822861ab 100644 --- a/paddle/operators/momentum_op.cu +++ b/paddle/operators/momentum_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 599df9c3df58db6444d7cb729e1a2c1f9f628b5b..c923e988a55b43ebb7ba6256e7b72a85c124f360 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -113,7 +113,7 @@ This operator is used to perform matrix multiplication for input $X$ and $Y$. The equation is: - $$Out = X * Y$$ +$$Out = X * Y$$ Both the input $X$ and $Y$ can carry the LoD (Level of Details) information, or not. But the output only shares the LoD information with input $X$. diff --git a/paddle/operators/mul_op.cu.cc b/paddle/operators/mul_op.cu.cc index 6095de58d0c58be6b647771e9784348cbf8c4ad4..43de9a719499e4e0e8fd2e5fcc6771d717ce6522 100644 --- a/paddle/operators/mul_op.cu.cc +++ b/paddle/operators/mul_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/mul_op.h" diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index 1b467dca8302c10fe08a157aac4586230e096dd0..1fb0569b49cce80c3f1e408fb57b5f5cf7033a27 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - You may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc index f524de60dbb3c652aa2a74478af6c0e38fb3cb43..11e047b5d57b6bc18e6d6f4a1d122e18dfc6e357 100644 --- a/paddle/operators/multiplex_op.cc +++ b/paddle/operators/multiplex_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/multiplex_op.h" @@ -51,7 +51,7 @@ class MultiplexOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.MultiInput("X")[0]->type()), @@ -102,7 +102,7 @@ class MultiplexGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.MultiInput("X")[0]->type()), diff --git a/paddle/operators/multiplex_op.cu b/paddle/operators/multiplex_op.cu index 47986e9ff86f2e08b0861cde35ac3a44b10caed1..f49ee71f104b72f5c8ea5fb1d49999528c21832e 100644 --- a/paddle/operators/multiplex_op.cu +++ b/paddle/operators/multiplex_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/operators/multiplex_op.h" @@ -36,7 +36,7 @@ class MultiplexGPUKernel : public framework::OpKernel { CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); - platform::GPUPlace place = boost::get(ctx.GetPlace()); + platform::CUDAPlace place = boost::get(ctx.GetPlace()); for (auto i = 0; i < rows; i++) { int32_t k = index[i]; PADDLE_ENFORCE_GE(k, 0, "index must be nonnegative."); @@ -73,7 +73,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel { auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); - platform::GPUPlace place = boost::get(ctx.GetPlace()); + platform::CUDAPlace place = boost::get(ctx.GetPlace()); for (auto i = 0; i < rows; i++) { size_t k = static_cast(index[i]); if (d_ins[k]) { diff --git a/paddle/operators/multiplex_op.h b/paddle/operators/multiplex_op.h index 344315116122f7ad843af740be8a31313c8a0342..ef66be5556ee613a037de13286ecc66b53885c1f 100644 --- a/paddle/operators/multiplex_op.h +++ b/paddle/operators/multiplex_op.h @@ -1,17 +1,16 @@ - /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/nccl/nccl_gpu_common.cc b/paddle/operators/nccl/nccl_gpu_common.cc index 6be735e4c731f79684e0bdac3d69a30b328fed84..1602a3d9b54dd64813770a7162f8d4f3dd0e791a 100644 --- a/paddle/operators/nccl/nccl_gpu_common.cc +++ b/paddle/operators/nccl/nccl_gpu_common.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/nccl/nccl_gpu_common.h" #include "paddle/platform/gpu_info.h" diff --git a/paddle/operators/nccl/nccl_gpu_common.h b/paddle/operators/nccl/nccl_gpu_common.h index 48e322f99398a7f1d6af9cab653d0cc92d981fe0..5173996f2020ec7a94643277e8c7a532d41d9045 100644 --- a/paddle/operators/nccl/nccl_gpu_common.h +++ b/paddle/operators/nccl/nccl_gpu_common.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/nccl_op.cc b/paddle/operators/nccl_op.cc index e19f534f8a2d05cd9b569a0eebb287db3d3321ba..9d51153b0631b988c9297f395672be67e18ee3f9 100644 --- a/paddle/operators/nccl_op.cc +++ b/paddle/operators/nccl_op.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/operators/nccl/nccl_gpu_common.h" @@ -24,7 +27,7 @@ class NCCLInitOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { const auto &name = Output("Communicator"); PADDLE_ENFORCE_NOT_NULL(scope.FindVar(name), "Can not find variable '%s' in the scope.", name); diff --git a/paddle/operators/nccl_op.cu.cc b/paddle/operators/nccl_op.cu.cc index 6ca6db7253da0e59c742f115cd25a1b8203a3044..1b986a13650de7d77f4828d71798ee00d61c1284 100644 --- a/paddle/operators/nccl_op.cu.cc +++ b/paddle/operators/nccl_op.cu.cc @@ -67,7 +67,7 @@ class NCCLAllReduceKernel : public framework::OpKernel { auto stream = ctx.cuda_device_context().stream(); // device id - int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); + int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); int idx = comm->GetCommId(gpu_id); for (size_t i = 0; i < ins.size(); ++i) { @@ -120,7 +120,7 @@ class NCCLReduceKernel : public framework::OpKernel { ctx.device_context()) .stream(); // device id - int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); + int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); int idx = comm->GetCommId(gpu_id); auto ins_names = ctx.Inputs("X"); @@ -164,7 +164,7 @@ class NCCLBcastKernel : public framework::OpKernel { ctx.device_context()) .stream(); // device id - int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); + int gpu_id = boost::get(ctx.GetPlace()).GetDeviceId(); int idx = comm->GetCommId(gpu_id); if (idx == root) { diff --git a/paddle/operators/nccl_op_test.cu.cc b/paddle/operators/nccl_op_test.cu.cc index c1046aadafbde303a3a8b12f2377018396b9adb8..6546096069d4c3fbc4908a16c2dba2ac6d7e6421 100644 --- a/paddle/operators/nccl_op_test.cu.cc +++ b/paddle/operators/nccl_op_test.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include @@ -22,6 +22,7 @@ #include #include "paddle/framework/block_desc.h" +#include "paddle/framework/init.h" #include "paddle/framework/op_desc.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/program_desc.h" @@ -49,9 +50,9 @@ const f::DDim kDims = {100, 100}; class NCCLTester : public ::testing::Test { public: virtual void SetUp() override { - cpu_ctx = new p::CPUDeviceContext(p::CPUPlace()); + paddle::platform::CPUPlace cpu_place; for (size_t i = 0; i < gpu_list.size(); ++i) { - p::GPUPlace place(i); + p::CUDAPlace place(i); dev_ctxs.emplace_back(new p::CUDADeviceContext(place)); } @@ -65,6 +66,7 @@ class NCCLTester : public ::testing::Test { } void NCCLInitOp() { + paddle::platform::CPUPlace cpu_place; std::unique_ptr op1(new f::OpDesc); op1->SetType("ncclInit"); @@ -76,7 +78,7 @@ class NCCLTester : public ::testing::Test { auto op = f::OpRegistry::CreateOp(*op1); VLOG(1) << "invoke NCCLInitOp."; - op->Run(g_scope, *cpu_ctx); + op->Run(g_scope, cpu_place); VLOG(1) << "NCCLInitOp finished."; } @@ -85,7 +87,7 @@ class NCCLTester : public ::testing::Test { std::unique_lock lk(mu); const f::OpDesc *op1 = &op_desc; - p::GPUPlace place(gpu_id); + p::CUDAPlace place(gpu_id); auto &ctx = dev_ctxs.at(gpu_id); auto *send_tensor = scope->Var("st")->GetMutable(); @@ -111,13 +113,12 @@ class NCCLTester : public ::testing::Test { VLOG(1) << "Device : " << gpu_id << " invoke " << op_desc.Type(); VLOG(1) << " send_tensor : " << send_tensor->numel() << " recv_tensor : " << recv_tensor->numel(); - op->Run(*scope, *ctx); + op->Run(*scope, place); VLOG(1) << "Device : " << gpu_id << " finished " << op_desc.Type(); } public: std::vector dev_ctxs; - p::DeviceContext *cpu_ctx; f::Scope g_scope; std::mutex mu; }; @@ -131,14 +132,14 @@ TEST(NCCL, ncclInitOp) { op_desc->SetAttr("gpus", {gpu_list}); f::Scope g_scope; - std::unique_ptr ctx(new p::CPUDeviceContext(p::CPUPlace())); + paddle::platform::CPUPlace cpu_place; auto *var = g_scope.Var("x1"); var->GetMutable(); auto op = f::OpRegistry::CreateOp(*op_desc); VLOG(1) << "invoke NCCLInitOp."; - op->Run(g_scope, *ctx.get()); + op->Run(g_scope, cpu_place); VLOG(1) << "NCCLInitOp finished."; } @@ -170,7 +171,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) { for (size_t i = 0; i < dev_scopes.size(); ++i) { p::CPUPlace cpu_place; - p::GPUPlace gpu_place(gpu_list[i]); + p::CUDAPlace gpu_place(gpu_list[i]); auto &recv_tensor = dev_scopes[i]->FindVar("rt")->Get(); auto *rt = recv_tensor.data(); @@ -179,7 +180,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) { auto *ct = result_tensor->mutable_data(cpu_place); paddle::memory::Copy( - cpu_place, ct, p::GPUPlace(gpu_list[i]), rt, + cpu_place, ct, p::CUDAPlace(gpu_list[i]), rt, recv_tensor.numel() * sizeof(float), static_cast(dev_ctxs[i])->stream()); @@ -218,7 +219,7 @@ TEST_F(NCCLTester, ncclReduceOp) { float result = std::accumulate(gpu_list.begin(), gpu_list.end(), 0); p::CPUPlace cpu_place; - p::GPUPlace gpu_place(gpu_list[kRoot]); + p::CUDAPlace gpu_place(gpu_list[kRoot]); auto &recv_tensor = dev_scopes[kRoot]->FindVar("rt")->Get(); auto *rt = recv_tensor.data(); @@ -228,7 +229,7 @@ TEST_F(NCCLTester, ncclReduceOp) { auto *ct = result_tensor->mutable_data(cpu_place); paddle::memory::Copy( - cpu_place, ct, p::GPUPlace(gpu_list[kRoot]), rt, + cpu_place, ct, p::CUDAPlace(gpu_list[kRoot]), rt, recv_tensor.numel() * sizeof(float), static_cast(dev_ctxs[kRoot])->stream()); @@ -267,7 +268,7 @@ TEST_F(NCCLTester, ncclBcastOp) { float result = kRoot; p::CPUPlace cpu_place; - p::GPUPlace gpu_place(gpu_list[idx]); + p::CUDAPlace gpu_place(gpu_list[idx]); auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get(); auto *rt = recv_tensor.data(); @@ -276,7 +277,7 @@ TEST_F(NCCLTester, ncclBcastOp) { auto *ct = result_tensor->mutable_data(cpu_place); paddle::memory::Copy( - cpu_place, ct, p::GPUPlace(gpu_list[idx]), rt, + cpu_place, ct, p::CUDAPlace(gpu_list[idx]), rt, recv_tensor.numel() * sizeof(float), static_cast(dev_ctxs[idx])->stream()); @@ -294,9 +295,18 @@ int main(int argc, char **argv) { return 0; } - for (int i = 0; i < dev_count; ++i) { + std::vector places; + + places.emplace_back(paddle::platform::CPUPlace()); + int count = paddle::platform::GetCUDADeviceCount(); + for (int i = 0; i < count; ++i) { + places.emplace_back(paddle::platform::CUDAPlace(i)); gpu_list.emplace_back(i); } + + VLOG(0) << " DeviceCount " << count; + paddle::platform::DeviceContextPool::Init(places); + testing::InitGoogleTest(&argc, argv); // device context should be release before scope. diff --git a/paddle/operators/nce_op.cc b/paddle/operators/nce_op.cc index 6dd457f7a2e410b65680004599ab753acbb34f71..d39ca87d53518963f652f7b8c8cb289a6fef70fd 100644 --- a/paddle/operators/nce_op.cc +++ b/paddle/operators/nce_op.cc @@ -63,7 +63,7 @@ class NCEOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), @@ -166,7 +166,7 @@ class NCEOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), diff --git a/paddle/operators/nce_op.h b/paddle/operators/nce_op.h index 6636dad06037f163252dc342200a99c756ed2a2e..e6b496f7896dcb412be8ff096fdccb2f0b682369 100644 --- a/paddle/operators/nce_op.h +++ b/paddle/operators/nce_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 8935751f15ccc4861c9e06d8d9031c8dff1a4af3..85d0153b32c0ba53bfe0912fc2682c8b635ba172 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -65,9 +65,9 @@ class NetOp : public framework::OperatorBase { * will be used. */ void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + const platform::Place& place) const override { for (auto& op : ops_) { - op->Run(scope, dev_ctx); + op->Run(scope, place); } } diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index 22fba9568d018586b4622884b7d6145fd646adb0..dfd86546e83a6276aedd198eaeb6fad2c50944df 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -13,8 +13,7 @@ class TestOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; DEFINE_OP_CLONE_METHOD(TestOp); - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override { + void Run(const Scope& scope, const platform::Place& place) const override { ++run_cnt; } }; diff --git a/paddle/operators/norm_op.cc b/paddle/operators/norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..b198b76cd49ca7c05b047d42df149d2b1e461b8e --- /dev/null +++ b/paddle/operators/norm_op.cc @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/norm_op.h" +namespace paddle { +namespace operators { + +template +class NormOpMaker : public framework::OpProtoAndCheckerMaker { + public: + NormOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "X", + "(Tensor) The input tensor of norm operator. " + "The format of input tensor is NCHW. Where N is batch size, C is the " + "number of channels, H and W is the height and width of feature."); + AddInput("Scale", + "(Tensor) The input tensor of norm operator. " + "The format of input tensor is C * 1."); + AddAttr("epsilon", + "(float, default 1e-10) Constant " + "for numerical stability.") + .SetDefault(1.0e-10f); + AddOutput("Out", + "(Tensor) The output tensor of norm operator." + "N * M." + "M = C * H * W"); + AddComment(R"DOC( + "Input shape: $(N, C, H, W)$ + Sclae shape: $(C, 1)$ + Output shape: $(N, C, H, W)$ + Where + forward + $$ + [\frac {x_{1}}{\sqrt{\sum{x_{i}^{2}}}} \frac {x_{2}}{\sqrt{\sum{x_{i}^{2}}}} \frac {x_{3}}{\sqrt{\sum{x_{i}^{2}}}} \cdot \cdot \cdot \frac {x_{n}}{\sqrt{\sum{x_{i}^{2}}}}] + $$ + backward + $$ + \frac{\frac{\mathrm{d}L }{\mathrm{d}y_{1}} - \frac {x_{1}\sum {\frac{\mathrm{d} L}{\mathrm{d} y_{j}}}x_{j}}{\sum x_{j}^{2}} }{\sqrt{\sum{x_{j}^{2}}}} + $$ + )DOC"); + } +}; + +class NormOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of NormOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Scale"), + "Input(Scale) of NormOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of NormOp should not be null."); + auto in_x_dims = ctx->GetInputDim("X"); + ctx->SetOutputDim("Out", in_x_dims); + } +}; + +class NormOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Input(X@GRAD) should not be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(norm, ops::NormOp, ops::NormOpMaker, norm_grad, + ops::NormOpGrad); +REGISTER_OP_CPU_KERNEL( + norm, ops::NormKernel, + ops::NormKernel); +REGISTER_OP_CPU_KERNEL( + norm_grad, ops::NormGradKernel, + ops::NormGradKernel); diff --git a/paddle/operators/norm_op.cu b/paddle/operators/norm_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..2941c89b93141388d3746cf128848960ed6f3625 --- /dev/null +++ b/paddle/operators/norm_op.cu @@ -0,0 +1,24 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#define EIGEN_USE_GPU + +#include "paddle/operators/norm_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + norm, ops::NormKernel, + ops::NormKernel); +REGISTER_OP_CUDA_KERNEL( + norm_grad, ops::NormGradKernel, + ops::NormGradKernel); diff --git a/paddle/operators/norm_op.h b/paddle/operators/norm_op.h new file mode 100644 index 0000000000000000000000000000000000000000..7bee48919e9fdf85595bbc7ad540ca45a1dbfe5c --- /dev/null +++ b/paddle/operators/norm_op.h @@ -0,0 +1,175 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +class NormKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* in_x = context.Input("X"); + const framework::Tensor* scale = context.Input("Scale"); + auto* out = context.Output("Out"); + auto epsilon = static_cast(context.Attr("epsilon")); + out->mutable_data(context.GetPlace()); + int batch_size = in_x->dims()[0]; + int channels = in_x->dims()[1]; + int height = in_x->dims()[2]; + int width = in_x->dims()[3]; + int fea_len = height * width; + auto* place = + context.template device_context().eigen_device(); + auto x = + framework::EigenMatrix::From( + *in_x, framework::make_ddim({batch_size, fea_len * channels})); + // get square + framework::Tensor x_square; + x_square.mutable_data(in_x->dims(), context.GetPlace()); + auto x_square_eigen = + framework::EigenMatrix::From( + x_square, framework::make_ddim({batch_size, fea_len * channels})); + x_square_eigen.device(*place) = x.square(); + auto scale_eigen = + framework::EigenVector::Flatten( + *scale); + for (int n = 0; n < batch_size; ++n) { + framework::Tensor in_x_batch = in_x->Slice(n, n + 1); + auto in_x_batch_eigen = + framework::EigenMatrix::From( + in_x_batch, framework::make_ddim({channels, fea_len})); + framework::Tensor x_square_batch = x_square.Slice(n, n + 1); + auto x_square_batch_eigen = + framework::EigenMatrix::From( + x_square_batch, framework::make_ddim({channels, fea_len})); + framework::Tensor out_batch = out->Slice(n, n + 1); + auto out_batch_eigen = + framework::EigenMatrix::From( + out_batch, framework::make_ddim({channels, fea_len})); + framework::Tensor tmp_tensor; + tmp_tensor.mutable_data(framework::make_ddim({1, fea_len}), + context.GetPlace()); + auto tmp = framework::EigenVector::Flatten(tmp_tensor); + // get colsum and sqrt , inverse + auto dim = Eigen::array({{0}}); + tmp.device(*place) = x_square_batch_eigen.sum(dim); + tmp.device(*place) = (tmp + epsilon).sqrt().inverse(); + Eigen::array broadcast_dim_col; + broadcast_dim_col[1] = 1; + broadcast_dim_col[0] = channels; + out_batch_eigen.device(*place) = + in_x_batch_eigen * (tmp.broadcast(broadcast_dim_col)); + Eigen::array broadcast_dim_row; + broadcast_dim_row[1] = fea_len; + broadcast_dim_row[0] = 1; + out_batch_eigen.device(*place) = + out_batch_eigen * (scale_eigen.broadcast(broadcast_dim_row)); + } + } +}; +template +class NormGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* in_x = context.Input("X"); + const framework::Tensor* scale = context.Input("Scale"); + const framework::Tensor* out_grad = + context.Input(framework::GradVarName("Out")); + auto epsilon = static_cast(context.Attr("epsilon")); + framework::Tensor* in_x_grad = + context.Output(framework::GradVarName("X")); + in_x_grad->mutable_data(context.GetPlace()); + int batch_size = in_x->dims()[0]; + int channels = in_x->dims()[1]; + int height = in_x->dims()[2]; + int width = in_x->dims()[3]; + int fea_len = height * width; + auto* place = + context.template device_context().eigen_device(); + + auto scale_eigen = + framework::EigenVector::Flatten( + *scale); + auto x = + framework::EigenMatrix::From( + *in_x, framework::make_ddim({batch_size, fea_len * channels})); + // get square + framework::Tensor x_square; + x_square.mutable_data(in_x->dims(), context.GetPlace()); + auto x_square_eigen = + framework::EigenMatrix::From( + x_square, framework::make_ddim({batch_size, fea_len * channels})); + x_square_eigen.device(*place) = x.square(); + + for (int n = 0; n < batch_size; ++n) { + framework::Tensor in_x_batch = in_x->Slice(n, n + 1); + auto in_x_batch_eigen = + framework::EigenMatrix::From( + in_x_batch, framework::make_ddim({channels, fea_len})); + framework::Tensor in_g_batch = in_x_grad->Slice(n, n + 1); + auto in_g_batch_eigen = + framework::EigenMatrix::From( + in_g_batch, framework::make_ddim({channels, fea_len})); + framework::Tensor x_square_batch = x_square.Slice(n, n + 1); + auto x_square_batch_eigen = + framework::EigenMatrix::From( + x_square_batch, framework::make_ddim({channels, fea_len})); + framework::Tensor outg_batch = out_grad->Slice(n, n + 1); + auto outg_batch_eigen = + framework::EigenMatrix::From( + outg_batch, framework::make_ddim({channels, fea_len})); + + framework::Tensor tmp_tensor; + tmp_tensor.mutable_data(framework::make_ddim({1, fea_len}), + context.GetPlace()); + auto tmp_eigen = + framework::EigenVector::Flatten(tmp_tensor); + auto dim = Eigen::array({{0}}); + tmp_eigen.device(*place) = (in_x_batch_eigen * outg_batch_eigen).sum(dim); + framework::Tensor norm_tmp_tensor; + norm_tmp_tensor.mutable_data(framework::make_ddim({1, fea_len}), + context.GetPlace()); + auto norm_tmp_eigen = + framework::EigenVector::Flatten(norm_tmp_tensor); + norm_tmp_eigen.device(*place) = + (x_square_batch_eigen.sum(dim) + epsilon).sqrt(); + Eigen::array broadcast_dim_col; + broadcast_dim_col[1] = 1; + broadcast_dim_col[0] = channels; + in_g_batch_eigen.device(*place) = + in_x_batch_eigen * tmp_eigen.broadcast(broadcast_dim_col); + in_g_batch_eigen.device(*place) = + in_g_batch_eigen / + (norm_tmp_eigen * norm_tmp_eigen).broadcast(broadcast_dim_col); + in_g_batch_eigen.device(*place) = outg_batch_eigen - in_g_batch_eigen; + // outg_batch_eigen + (in_g_batch_eigen * -1); + in_g_batch_eigen.device(*place) = + in_g_batch_eigen / norm_tmp_eigen.broadcast(broadcast_dim_col); + Eigen::array broadcast_dim_row; + broadcast_dim_row[1] = fea_len; + broadcast_dim_row[0] = 1; + in_g_batch_eigen.device(*place) = + in_g_batch_eigen * (scale_eigen.broadcast(broadcast_dim_row)); + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/pad_op.cc b/paddle/operators/pad_op.cc index 40f7a7eed53354fa65373830b0972c0e72ef54da..90c53bd17732aa046beceb3ac0a3b8c0d69994f3 100644 --- a/paddle/operators/pad_op.cc +++ b/paddle/operators/pad_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/pad_op.h" diff --git a/paddle/operators/pad_op.cu b/paddle/operators/pad_op.cu index c309fb625cca203418db2599a59ea0144782efc2..433b5f1112a27b36edbe6d99fcdd4fc8395bc2e8 100644 --- a/paddle/operators/pad_op.cu +++ b/paddle/operators/pad_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/pad_op.h" diff --git a/paddle/operators/pad_op.h b/paddle/operators/pad_op.h index 1b95942af3b3711fcad965cdc3f2d2f99b2f32e8..fdf91a5776620485c38a8b2c5f8b26039e438d0c 100644 --- a/paddle/operators/pad_op.h +++ b/paddle/operators/pad_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/parallel_do_op.cc b/paddle/operators/parallel_do_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..077245cd83b2535b45927c8350ca7bc14c541d21 --- /dev/null +++ b/paddle/operators/parallel_do_op.cc @@ -0,0 +1,293 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include "paddle/framework/executor.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/threadpool.h" + +namespace paddle { +namespace operators { + +static constexpr char kInputs[] = "inputs"; +static constexpr char kParameters[] = "parameters"; +static constexpr char kPlaces[] = "places"; + +static constexpr char kOutputs[] = "outputs"; +static constexpr char kParallelScopes[] = "parallel_scopes"; + +static constexpr char kParallelBlock[] = "sub_block"; + +// using ParallelScopeVar = std::vector; +using LoDTensor = framework::LoDTensor; +using OperatorBase = framework::OperatorBase; + +void SplitTensorAndMoveTensorToScopes( + const framework::Scope &scope, + const std::vector &sub_scopes, + const std::vector &places, + const std::vector &names) { + for (auto &argu : names) { + auto *var = scope.FindVar(argu); + const auto &tensor = var->Get(); + auto lod_tensors = tensor.SplitLoDTensor(places); + + for (auto &lod : lod_tensors) { + VLOG(3) << lod.dims(); + } + + for (size_t i = 0; i < sub_scopes.size(); ++i) { + *sub_scopes[i]->Var(argu)->GetMutable() = lod_tensors[i]; + } + } +} + +class ParallelDoOp : public framework::OperatorBase { + public: + ParallelDoOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void Run(const framework::Scope &scope, + const platform::Place &place) const override { + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + auto *block = Attr(kParallelBlock); + auto *program = block->Program(); + + // TODO(tonyyang-svail): get places from input + std::vector places; + places.emplace_back(platform::CPUPlace()); + places.emplace_back(platform::CPUPlace()); + + auto &sub_scopes = *scope.FindVar(Output(kParallelScopes)) + ->GetMutable>(); + for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) { + sub_scopes.push_back(&scope.NewScope()); + } + + SplitTensorAndMoveTensorToScopes(scope, sub_scopes, places, + Inputs(kInputs)); + + std::vector> workers; + workers.reserve(places.size()); + for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) { + VLOG(3) << "Run " << place_idx; + + auto &place = places[place_idx]; + auto *cur_scope = sub_scopes[place_idx]; + + // copy parameter + // some version of boost lacks != for boost::variant + if (!(dev_ctx.GetPlace() == place)) { + PADDLE_THROW("Not Implemented"); + } + + workers.emplace_back(framework::Async([program, cur_scope, place, block] { + framework::Executor executor(place); + executor.Run(*program, cur_scope, block->ID(), + false /*create_local_scope*/); + })); + } + for (auto &worker : workers) { + worker.wait(); + } + + // merge output + for (auto &o_name : Outputs(kOutputs)) { + std::vector lod_tensors; + lod_tensors.reserve(sub_scopes.size()); + for (auto *sub_scope : sub_scopes) { + lod_tensors.emplace_back(&sub_scope->FindVar(o_name)->Get()); + } + + auto *lod_tensor_to_be_merged = + scope.FindVar(o_name)->GetMutable(); + lod_tensor_to_be_merged->MergeLoDTensor(lod_tensors, dev_ctx.GetPlace()); + } + } +}; + +class ParallelDoOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + ParallelDoOpProtoMaker(OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput(kInputs, "").AsDuplicable(); + AddInput(kParameters, "").AsDuplicable(); + AddInput(kPlaces, ""); + AddOutput(kOutputs, "").AsDuplicable(); + AddOutput(kParallelScopes, ""); + AddAttr(kParallelBlock, ""); + AddComment(R"DOC( +ParallelDo Operator. +)DOC"); + } +}; + +class ParallelDoGradOp : public OperatorBase { + public: + ParallelDoGradOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void Run(const framework::Scope &scope, + const platform::Place &place) const override { + // // get device context from pool + // platform::DeviceContextPool &pool = + // platform::DeviceContextPool::Instance(); + // auto &dev_ctx = *pool.Get(place); + + auto *block = Attr(kParallelBlock); + auto *program = block->Program(); + + auto &sub_scopes = scope.FindVar(Input(kParallelScopes)) + ->Get>(); + + // TODO(tonyyang-svail): get places from input + std::vector places; + places.emplace_back(platform::CPUPlace()); + places.emplace_back(platform::CPUPlace()); + + // feed output@grad + SplitTensorAndMoveTensorToScopes(scope, sub_scopes, places, + Inputs(framework::GradVarName(kOutputs))); + + for (auto &s : Inputs(framework::GradVarName(kOutputs))) { + VLOG(3) << s; + VLOG(3) << scope.FindVar(s)->Get(); + for (auto *sub_scope : sub_scopes) { + VLOG(3) << sub_scope->FindVar(s)->Get(); + } + } + + // exe run + std::vector> workers; + for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) { + VLOG(3) << "Run " << place_idx; + + auto &place = places[place_idx]; + auto *cur_scope = sub_scopes[place_idx]; + + // execute + workers.emplace_back(framework::Async([program, cur_scope, place, block] { + framework::Executor executor(place); + executor.Run(*program, cur_scope, block->ID(), + false /*create_local_scope*/); + })); + } + for (auto &worker : workers) { + worker.wait(); + } + + // merge grad + for (auto &s : Outputs(framework::GradVarName(kParameters))) { + VLOG(3) << s; + + auto &t = sub_scopes[0]->FindVar(s)->Get(); + VLOG(3) << t; + + std::string s_buf = s + "@BUF"; + auto *t_buf = sub_scopes[0]->Var(s_buf)->GetMutable(); + + for (size_t place_idx = 1; place_idx < places.size(); ++place_idx) { + auto &tt = sub_scopes[place_idx]->FindVar(s)->Get(); + VLOG(3) << place_idx; + VLOG(3) << tt; + framework::CopyFrom(tt, places[0], t_buf); + + auto sum_op = framework::OpRegistry::CreateOp( + "sum", {{"X", {s, s_buf}}}, {{"Out", {s}}}, + framework::AttributeMap{}); + sum_op->Run(*sub_scopes[0], place); + } + + VLOG(3) << t; + framework::CopyFrom(t, place, scope.FindVar(s)->GetMutable()); + } + } +}; + +class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + virtual std::unique_ptr Apply() const { + auto *grad = new framework::OpDesc(); + grad->SetType("parallel_do_grad"); + for (auto &input_param : this->InputNames()) { + VLOG(3) << input_param; + grad->SetInput(input_param, this->Input(input_param)); + grad->SetOutput(framework::GradVarName(input_param), + this->InputGrad(input_param, false)); + } + + for (auto &output_param : this->OutputNames()) { + if (output_param == kParallelScopes) { + grad->SetInput(output_param, this->Output(output_param)); + grad->SetInput(framework::GradVarName(output_param), + this->Output(output_param)); + } else { + grad->SetInput(output_param, this->Output(output_param)); + grad->SetInput(framework::GradVarName(output_param), + this->OutputGrad(output_param)); + } + } + grad->SetAttrMap(this->Attrs()); + grad->SetBlockAttr(kParallelBlock, *grad_block_[0]); + + return std::unique_ptr(grad); + } +}; + +class ParallelDoGradOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + std::vector input{kParameters, kInputs}; + std::vector output{kOutputs}; + for (auto &s : input) { + PADDLE_ENFORCE(ctx->HasInputs(s)); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(s)), + "Cannot find the gradient variable %s", + framework::GradVarName(s)); + } + for (auto &s : output) { + PADDLE_ENFORCE(ctx->HasInputs(s)); + } + for (auto &s : input) { + ctx->SetOutputsDim(framework::GradVarName(s), ctx->GetInputsDim(s)); + } + if (ctx->HasInputs(kParameters)) { + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(kParameters))); + ctx->SetOutputsDim(framework::GradVarName(kParameters), + ctx->GetInputsDim(kParameters)); + } + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(parallel_do, paddle::operators::ParallelDoOp, + paddle::operators::ParallelDoOpProtoMaker, + paddle::operators::ParallelDoGradOpDescMaker); +REGISTER_OPERATOR(parallel_do_grad, paddle::operators::ParallelDoGradOp, + paddle::operators::ParallelDoGradOpShapeInference); diff --git a/paddle/operators/pool_cudnn_op.cu.cc b/paddle/operators/pool_cudnn_op.cu.cc index fc2b37bd0fbac82005e709779b2939843b839596..2d0001ba1184c99d9fc642f60c97ba89cec97ccd 100644 --- a/paddle/operators/pool_cudnn_op.cu.cc +++ b/paddle/operators/pool_cudnn_op.cu.cc @@ -29,7 +29,7 @@ class PoolCudnnOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); const Tensor *input = ctx.Input("X"); Tensor *output = ctx.Output("Out"); @@ -90,7 +90,7 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); const Tensor *input = ctx.Input("X"); const Tensor *output = ctx.Input("Out"); diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index 50057eb6483e9c9e745bc07dee26a0bbbbb5a48c..d3cf5fa638c53dfdfacec153211f447a1e2fa3bf 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -58,6 +58,7 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const { OutputSizePool(in_x_dims[i + 2], ksize[i], paddings[i], strides[i])); } ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + ctx->ShareLoD("X", "Out"); } void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 980e9dc08b2ac160e6e06dfb11ff8f3e1279be46..76c5123527c3ff5b7e6c7eec39f4eb1d612759d3 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -69,7 +69,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -90,7 +90,7 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/positive_negative_pair_op.cc b/paddle/operators/positive_negative_pair_op.cc index ab9f67bfe6b3d6f59b35a57cb8135e9c6d00636e..a6b23c995b8b9104f2da9d3d29ceb3eb88e7da63 100644 --- a/paddle/operators/positive_negative_pair_op.cc +++ b/paddle/operators/positive_negative_pair_op.cc @@ -85,7 +85,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Score")->type()), @@ -154,13 +154,14 @@ class PositiveNegativePairOpMaker : public framework::OpProtoAndCheckerMaker { "Noting that reducing on the first dim will make the LoD info lost.") .SetDefault(0); AddComment(R"DOC( - PositiveNegativePairOp can be used to evaluate Learning To Rank(LTR) - model performance. - Within some context, e.g. the "query", a LTR model generates scores - for a list of items, which gives a partial order of the items. - PositiveNegativePairOp takes a list of reference rank order - (Input("Label")) and the model generated scores (Input(Score)) as - inputs and counts the pairs that ranked correctly and incorrectly. +PositiveNegativePairOp can be used to evaluate Learning To Rank(LTR) model's +performance. + +Within some context, e.g. the "query", a LTR model generates scores for a list +of items, which gives a partial order of the items. PositiveNegativePairOp +takes a list of reference rank order (Input("Label")) and the model generated +scores (Input(Score)) as inputs and counts the pairs that ranked correctly +and incorrectly. )DOC"); } }; diff --git a/paddle/operators/precision_recall_op.cc b/paddle/operators/precision_recall_op.cc index 21dcd28c67bb5eb1d3af0ac8ba16f1d5df1958a8..c5753147effd17c012683e1058e34af46288f366 100644 --- a/paddle/operators/precision_recall_op.cc +++ b/paddle/operators/precision_recall_op.cc @@ -80,7 +80,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("MaxProbs")->type()), diff --git a/paddle/operators/prelu_op.cc b/paddle/operators/prelu_op.cc index 4af8f85277ddb2262aa534f8d81be30449ccf8da..ddc21a657024dcc800726475fa6242f8e6576ad1 100644 --- a/paddle/operators/prelu_op.cc +++ b/paddle/operators/prelu_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/prelu_op.h" #include "paddle/operators/net_op.h" diff --git a/paddle/operators/prelu_op.cu b/paddle/operators/prelu_op.cu index 12033dee0e1c190b08080023d6746fcad48db2fd..1718bb5cd65f48eba391023e4374a30e405a164d 100644 --- a/paddle/operators/prelu_op.cu +++ b/paddle/operators/prelu_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/prelu_op.h" diff --git a/paddle/operators/rank_loss_op.cc b/paddle/operators/rank_loss_op.cc index b5a9949d236bfa6910227092f0682a599543a425..f2164a0f80519ed9c2490ab3aa6809dc84c6070d 100644 --- a/paddle/operators/rank_loss_op.cc +++ b/paddle/operators/rank_loss_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/rank_loss_op.h" diff --git a/paddle/operators/rank_loss_op.cu b/paddle/operators/rank_loss_op.cu index 5aee66443d60c8e20625880ba2ec9606b8a007a0..294b22738347b17dd67df05291ac496bfb608323 100644 --- a/paddle/operators/rank_loss_op.cu +++ b/paddle/operators/rank_loss_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/rank_loss_op.h" diff --git a/paddle/operators/rank_loss_op.h b/paddle/operators/rank_loss_op.h index ea24b61fd94b57950e79b7c1ddb13fa165953538..bd0c49ca6e42bcb1a25c53421c0e672cecbb3a15 100644 --- a/paddle/operators/rank_loss_op.h +++ b/paddle/operators/rank_loss_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 5981d5745d24e0b2fe68bf8b9852cb8a6094885f..056fa46949cd623845956521b068109085a8795e 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include "paddle/framework/executor.h" @@ -227,14 +227,15 @@ class RecurrentOp : public RecurrentBase { : RecurrentBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto seq_len = static_cast(this->GetSequenceLength(scope)); VLOG(3) << "Static RNN input sequence length = " << seq_len; StepScopes scopes = CreateStepScopes(scope, seq_len); auto reverse = Attr(kReverse); - framework::Executor executor(dev_ctx); + framework::Executor executor(place); auto *block = Attr(kStepBlock); + auto *program = block->Program(); for (size_t i = 0; i < seq_len; ++i) { @@ -270,6 +271,11 @@ class RecurrentOp : public RecurrentBase { executor.Run(*program, &cur_scope, block->ID(), false /*create_local_scope*/); + // get device context from pool + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + // Copy inside::output -> outside::output // outside::output[seq_offset: seq_offset + 1] = inside::output this->LinkTensorWithCallback( @@ -278,14 +284,13 @@ class RecurrentOp : public RecurrentBase { framework::LoDTensor *dst_tensor) { if (i == 0) { // create output tensor at begin dst_tensor->Resize(PrependDims(seq_len, src_tensor.dims())); - dst_tensor->mutable_data(dev_ctx.GetPlace(), src_tensor.type()); + dst_tensor->mutable_data(place, src_tensor.type()); } auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1); // Explicit copy output since the local RNN scope can be destroyed // early. - framework::CopyFrom(src_tensor, dev_ctx.GetPlace(), dev_ctx, - &dst_out); + framework::CopyFrom(src_tensor, place, dev_ctx, &dst_out); }); scopes.Next(); @@ -311,15 +316,20 @@ class RecurrentGradOp : public RecurrentBase { : RecurrentBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto seq_len = static_cast(GetSequenceLength(scope)); StepScopes scopes = CreateStepScopes(scope, seq_len); auto reverse = Attr(kReverse); - framework::Executor executor(dev_ctx); + framework::Executor executor(place); auto *block = Attr(kStepBlock); + auto *program = block->Program(); + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + for (size_t step_id = 0; step_id < seq_len; ++step_id) { size_t seq_offset = reverse ? step_id : seq_len - step_id - 1; VLOG(3) << "Recurrent backward operate at the time step " << seq_offset; @@ -366,8 +376,7 @@ class RecurrentGradOp : public RecurrentBase { auto *cur_grad_var = cur_scope.Var(cur_grad); auto cur_grad_tensor = cur_grad_var->GetMutable(); - framework::CopyFrom(ex_tensor, dev_ctx.GetPlace(), dev_ctx, - cur_grad_tensor); + framework::CopyFrom(ex_tensor, place, dev_ctx, cur_grad_tensor); } } @@ -410,7 +419,7 @@ class RecurrentGradOp : public RecurrentBase { auto zero_op = framework::OpRegistry::CreateOp( "fill_constant", framework::VariableNameMap{}, {{"Out", {pg_names[param_id]}}}, attrs); - zero_op->Run(scope, dev_ctx); + zero_op->Run(scope, place); } auto new_inside_name = cur_scope.Rename(inside_grad_name); @@ -419,7 +428,7 @@ class RecurrentGradOp : public RecurrentBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); - sum_op->Run(cur_scope, dev_ctx); + sum_op->Run(cur_scope, place); cur_scope.Rename(new_inside_name, inside_grad_name); } @@ -437,11 +446,11 @@ class RecurrentGradOp : public RecurrentBase { } if (step_id == 0) { // alloc memory outside->Resize(PrependDims(seq_len, inside.dims())); - outside->mutable_data(dev_ctx.GetPlace(), inside.type()); + outside->mutable_data(place, inside.type()); } auto dst = outside->Slice(seq_offset, seq_offset + 1); - framework::CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx, &dst); + framework::CopyFrom(inside, place, dev_ctx, &dst); }); VLOG(5) << "Link outside gradient finished "; @@ -453,8 +462,8 @@ class RecurrentGradOp : public RecurrentBase { [&](const framework::LoDTensor &inside, framework::LoDTensor *outside) { outside->Resize(inside.dims()); - outside->mutable_data(dev_ctx.GetPlace(), inside.type()); - framework::CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx, outside); + outside->mutable_data(place, inside.type()); + framework::CopyFrom(inside, place, dev_ctx, outside); }); VLOG(5) << "Link initialize state gradient finished "; } @@ -483,7 +492,7 @@ class RecurrentGradOp : public RecurrentBase { std::unordered_set LocalVarNames( const framework::Scope &scope) const { - return this->List2Set(scope.GetAllNames(false)); + return this->List2Set(scope.LocalVarNames()); } static std::vector GradVarLists( const std::vector &var_names) { diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 4e91d1151ebf7a0cd520f2f1fa58a0cc4a0d1bef..82fceb3da7d396bcfc1d95baccc4ee36b87f4d39 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include @@ -28,6 +28,8 @@ #include "paddle/operators/detail/send_recv_impl.h" #include "paddle/operators/detail/simple_block_queue.h" +#define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV" + namespace paddle { namespace operators { @@ -39,7 +41,7 @@ void RunServer(Server **rpc_server, builder.RegisterService(service.get()); std::unique_ptr server(builder.BuildAndStart()); *rpc_server = server.get(); - LOG(INFO) << "Server listening on " << server_address << std::endl; + LOG(INFO) << "Server listening on " << server_address; server->Wait(); } @@ -57,7 +59,10 @@ class RecvOp : public framework::OperatorBase { } } - virtual ~RecvOp() { + void Stop() override { + detail::MessageWithName term_msg; + term_msg.first = LISTEN_TERMINATE_MESSAGE; + rpc_service_->Push(term_msg); rpc_server_->Shutdown(); server_thread_->join(); } @@ -73,7 +78,7 @@ class RecvOp : public framework::OperatorBase { } void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { // FIXME(typhoonzero): no new scopes for every run. framework::Scope &recv_scope = scope.NewScope(); rpc_service_->SetScope(&recv_scope); @@ -83,13 +88,18 @@ class RecvOp : public framework::OperatorBase { size_t param_count = param_list.size(); rpc_service_->Reset(); // TODO(typhoonzero): change this to a while_op for every cluster-batch. - while (true) { + bool exit_flag = false; + while (!exit_flag) { // Get from multiple trainers, we don't care about order in which // the gradient arrives, just add suffix 0~n then average the gradient. for (size_t i = 0; i < param_count * trainer_count; ++i) { // blocking get one var from client. - const detail::TensorWithName &v = rpc_service_->Get(); + const detail::MessageWithName &v = rpc_service_->Get(); auto grad_var_name = v.first; + if (grad_var_name == LISTEN_TERMINATE_MESSAGE) { + exit_flag = true; + break; + } auto it = std::find(grad_list.begin(), grad_list.end(), grad_var_name); std::string param_var_name; if (it != grad_list.end()) { @@ -111,9 +121,13 @@ class RecvOp : public framework::OperatorBase { } auto *var = recv_scope.Var(grad_var_name); - auto *tensor = var->GetMutable(); - // FIXME(typhoonzero): do not copy - framework::CopyFrom(v.second, dev_ctx.GetPlace(), dev_ctx, tensor); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); + detail::DeserializeFromMessage(v.second, dev_ctx, var); + } + if (exit_flag) { + break; } rpc_service_->Reset(); @@ -121,7 +135,7 @@ class RecvOp : public framework::OperatorBase { framework::proto::ProgramDesc program_desc; program_desc.ParseFromString(program_str); framework::ProgramDesc program(program_desc); - framework::Executor executor(dev_ctx); + framework::Executor executor(dev_place); // Run sub graph to get optimized tensor try { executor.Run(program, &recv_scope, 0, /*global_block*/ diff --git a/paddle/operators/reduce_op.cc b/paddle/operators/reduce_op.cc index 19220f2f59d218e9b4d57b260b35df64b4abd2cb..a3ff4a6ca0ef30be42e7801386a3561930638a8a 100644 --- a/paddle/operators/reduce_op.cc +++ b/paddle/operators/reduce_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/reduce_op.h" #include "paddle/operators/net_op.h" diff --git a/paddle/operators/reduce_op.cu b/paddle/operators/reduce_op.cu index a10ace5253b850db5855bef8384278edebc9e45f..1dd948ed8a79cce8468f2fe210b5636e7dd1f99e 100644 --- a/paddle/operators/reduce_op.cu +++ b/paddle/operators/reduce_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/reduce_op.h" diff --git a/paddle/operators/reduce_op.h b/paddle/operators/reduce_op.h index 7bd99cb1e6d532963ef648202f460f363baad9b5..da5f3977769990a45c94db21f5dbd01ac70ac06e 100644 --- a/paddle/operators/reduce_op.h +++ b/paddle/operators/reduce_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/operators/reorder_lod_tensor_by_rank_op.cc index 5e3079ee0c91c337dca1e57729438fb9be4a0ff4..8d652ff806461cea3d0e8d3bd70704b4b6bc2173 100644 --- a/paddle/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/operators/reorder_lod_tensor_by_rank_op.cc @@ -1,20 +1,21 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ -#include +#include "paddle/framework/lod_rank_table.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/detail/safe_ref.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -53,7 +54,7 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto &x = detail::Ref(scope.FindVar(Input("X")), "Cannot find input lod tensor variable %s", Input("X")) @@ -69,11 +70,11 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { out.Resize(x.dims()); out.mutable_data(x.place(), x.type()); - this->process(dev_ctx, x, rank_table, &out); + this->process(place, x, rank_table, &out); } protected: - virtual void process(const platform::DeviceContext &dev_ctx, + virtual void process(const platform::Place &place, const framework::LoDTensor &x, const framework::LoDRankTable &rank_table, framework::LoDTensor *out) const = 0; @@ -104,7 +105,7 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { return absolute_table; } - size_t CopyTensorAndLod(const platform::DeviceContext &dev_ctx, + size_t CopyTensorAndLod(const platform::Place &place, const AbsoluteRankTableItem &item, const framework::LoDTensor &x, framework::LoDTensor *out, size_t out_offset) const { @@ -130,6 +131,8 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { auto x_sliced = x.Slice(x_offset, x_offset + len); auto out_sliced = out->Slice(out_offset, out_offset + len); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); framework::CopyFrom(x_sliced, out_sliced.place(), dev_ctx, &out_sliced); out_offset += len; return out_offset; @@ -145,8 +148,7 @@ class ReorderLoDTensorByRankTableOp : public ReorderLoDTensorByRankTableBase { : ReorderLoDTensorByRankTableBase(type, inputs, outputs, attrs) {} protected: - void process(const platform::DeviceContext &dev_ctx, - const framework::LoDTensor &x, + void process(const platform::Place &place, const framework::LoDTensor &x, const framework::LoDRankTable &rank_table, framework::LoDTensor *out) const override { auto absolute_table = GetAbsoluteOffsetAndLengthByLoDRankTable(x); @@ -154,7 +156,7 @@ class ReorderLoDTensorByRankTableOp : public ReorderLoDTensorByRankTableBase { out->mutable_lod()->clear(); for (auto &item : rank_table.items()) { PADDLE_ENFORCE_LT(item.index, absolute_table.size()); - out_offset = CopyTensorAndLod(dev_ctx, absolute_table[item.index], x, out, + out_offset = CopyTensorAndLod(place, absolute_table[item.index], x, out, out_offset); } } @@ -192,8 +194,7 @@ class ReorderLoDTensorByRankGradOp : public ReorderLoDTensorByRankTableBase { : ReorderLoDTensorByRankTableBase(type, inputs, outputs, attrs) {} protected: - void process(const platform::DeviceContext &dev_ctx, - const framework::LoDTensor &x, + void process(const platform::Place &place, const framework::LoDTensor &x, const framework::LoDRankTable &rank_table, framework::LoDTensor *out) const override { auto absolute_table = GetAbsoluteOffsetAndLengthByLoDRankTable(x); @@ -214,7 +215,7 @@ class ReorderLoDTensorByRankGradOp : public ReorderLoDTensorByRankTableBase { // Copy TensorAndLod size_t out_offset = 0; for (auto &offset : offsets) { - out_offset = this->CopyTensorAndLod(dev_ctx, absolute_table[offset.first], + out_offset = this->CopyTensorAndLod(place, absolute_table[offset.first], x, out, out_offset); } } diff --git a/paddle/operators/reshape_op.cc b/paddle/operators/reshape_op.cc index 2c5167295d8546358b09e018ee02f0941f2897d1..58e8fd6124d8c076337ae9bb2f5103e7a3cb7ff0 100644 --- a/paddle/operators/reshape_op.cc +++ b/paddle/operators/reshape_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/reshape_op.h" diff --git a/paddle/operators/reshape_op.cu b/paddle/operators/reshape_op.cu index b7329238c0ea8ebb374d35bd7cddced3dfee1a2c..f487e43b99d5be2af299a9edd91dcda0c4eb7b99 100644 --- a/paddle/operators/reshape_op.cu +++ b/paddle/operators/reshape_op.cu @@ -1,22 +1,22 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/reshape_op.h" REGISTER_OP_CUDA_KERNEL( reshape, - paddle::operators::ReshapeKernel); + paddle::operators::ReshapeKernel); REGISTER_OP_CUDA_KERNEL( reshape_grad, - paddle::operators::ReshapeGradKernel); + paddle::operators::ReshapeGradKernel); diff --git a/paddle/operators/reshape_op.h b/paddle/operators/reshape_op.h index 92d8cbbb56e224fe67e630bdfcb16d7df44f2846..a4eb34a0ad1230b6257cd299c8ed563acb054367 100644 --- a/paddle/operators/reshape_op.h +++ b/paddle/operators/reshape_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/rmsprop_op.cu b/paddle/operators/rmsprop_op.cu index 2a9fd6e1044e923b9ccffab834ff64df0f7cf5d7..0295dc262f095a2b58ab34e3c2ec9f5440e4bfca 100644 --- a/paddle/operators/rmsprop_op.cu +++ b/paddle/operators/rmsprop_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/rmsprop_op.h" diff --git a/paddle/operators/rnn_memory_helper_op.cc b/paddle/operators/rnn_memory_helper_op.cc index 795bdf3e51a2dd323e85c497fcf203ad3ed54183..eb55ed6a05b51d7a6c63d16fcf5aff73f6744903 100644 --- a/paddle/operators/rnn_memory_helper_op.cc +++ b/paddle/operators/rnn_memory_helper_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" @@ -25,7 +25,7 @@ class RNNMemoryHelperOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto mem_var_name = Input("X"); auto *mem_var = scope.FindVar(mem_var_name); PADDLE_ENFORCE(mem_var != nullptr, @@ -77,7 +77,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto out_grad_var_name = Input(framework::GradVarName("Out")); auto *out_grad_var = scope.FindVar(out_grad_var_name); @@ -100,7 +100,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { auto zero_op = framework::OpRegistry::CreateOp( "fill_constant", {}, {{"Out", {in_grad_var_name}}}, attrs); - zero_op->Run(scope, dev_ctx); + zero_op->Run(scope, dev_place); } else { auto &out_grad_tensor = out_grad_var->Get(); auto *in_grad_tensor = in_grad_var->GetMutable(); diff --git a/paddle/operators/roi_pool_op.cc b/paddle/operators/roi_pool_op.cc index 85b6a8e15160d0c259a270f5e12ca9e67a6508ab..ef1804d9762200686ac8537140af046c21443779 100644 --- a/paddle/operators/roi_pool_op.cc +++ b/paddle/operators/roi_pool_op.cc @@ -68,7 +68,7 @@ class ROIPoolOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -89,7 +89,7 @@ class ROIPoolGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/row_conv_op.cc b/paddle/operators/row_conv_op.cc index 6b116a9fe704e6ddf18c22455c06346ea14909d2..68f4e3531566fd346055404d45651c2b53ebe31b 100644 --- a/paddle/operators/row_conv_op.cc +++ b/paddle/operators/row_conv_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/row_conv_op.h" #include "paddle/framework/eigen.h" diff --git a/paddle/operators/row_conv_op.cu b/paddle/operators/row_conv_op.cu index 56a98ff299e8263179306756631949761e386f70..41f2c5b9de91ade15b4010f56377675cfd1b611c 100644 --- a/paddle/operators/row_conv_op.cu +++ b/paddle/operators/row_conv_op.cu @@ -1,16 +1,16 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/math/math_function.h" #include "paddle/operators/row_conv_op.h" diff --git a/paddle/operators/row_conv_op.h b/paddle/operators/row_conv_op.h index 80912ad8f73b3581efa9e263427e99304208d581..10d435ab080851713ee08a491c43aad1549f6fbb 100644 --- a/paddle/operators/row_conv_op.h +++ b/paddle/operators/row_conv_op.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/save_load_op_test.cc b/paddle/operators/save_load_op_test.cc index a57466a48d4d6016fe2618d19fdca4c4f667124a..40103d864fb58804b39ca5f3c63e802a430ce886 100644 --- a/paddle/operators/save_load_op_test.cc +++ b/paddle/operators/save_load_op_test.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "gtest/gtest.h" #include "paddle/framework/op_registry.h" @@ -21,7 +21,7 @@ USE_NO_KERNEL_OP(load); TEST(SaveLoadOp, CPU) { paddle::framework::Scope scope; paddle::platform::CPUPlace place; - paddle::platform::CPUDeviceContext ctx(place); + auto var = scope.Var("test_var"); auto tensor = var->GetMutable(); tensor->Resize({10, 10}); @@ -42,13 +42,13 @@ TEST(SaveLoadOp, CPU) { auto save_op = paddle::framework::OpRegistry::CreateOp( "save", {{"X", {"test_var"}}}, {}, attrs); - save_op->Run(scope, ctx); + save_op->Run(scope, place); auto load_var = scope.Var("out_var"); auto target = load_var->GetMutable(); auto load_op = paddle::framework::OpRegistry::CreateOp( "load", {}, {{"Out", {"out_var"}}}, attrs); - load_op->Run(scope, ctx); + load_op->Run(scope, place); int* actual = target->data(); for (int64_t i = 0; i < tensor->numel(); ++i) { EXPECT_EQ(expect[i], actual[i]); diff --git a/paddle/operators/save_op.cc b/paddle/operators/save_op.cc index eae1146d6c61fe56ebc48ac50e1eacd62e3fa7d0..4b1cbe88836e340c94f797806243a6768410ed3d 100644 --- a/paddle/operators/save_op.cc +++ b/paddle/operators/save_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include @@ -21,6 +21,7 @@ #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -62,7 +63,7 @@ class SaveOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto filename = Attr("file_path"); auto overwrite = Attr("overwrite"); @@ -88,6 +89,11 @@ class SaveOp : public framework::OperatorBase { "SaveOp only support LoDTensor, %s has wrong type", iname); auto &tensor = var->Get(); + + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + framework::SerializeToStream(fout, tensor, dev_ctx); } }; diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index ee39888713544703ee8d305b2c04e4b03deeceab..f634ebe9a2a4648bd08f00af635ef22e8d86a8de 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/scale_op.h" #include "paddle/operators/net_op.h" diff --git a/paddle/operators/scale_op.cu b/paddle/operators/scale_op.cu index 0c7980430f31e2720c7af97aa14cf146c7dfc009..7202c0de707ff0b0b3ad966d9d1d3a7c0a89e880 100644 --- a/paddle/operators/scale_op.cu +++ b/paddle/operators/scale_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/scale_op.h" diff --git a/paddle/operators/scale_op.h b/paddle/operators/scale_op.h index 02a8c97a83f5b6f95bbd4079c453dfdc7b7c1481..395268c2eee40c187f5d211317ca8b28d35a71e0 100644 --- a/paddle/operators/scale_op.h +++ b/paddle/operators/scale_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/scatter.cu.h b/paddle/operators/scatter.cu.h index d95436be4f25b9df4aaef57ddb249ecf944f0666..55555300fc3219c0651583d8540b47189c8d3f13 100644 --- a/paddle/operators/scatter.cu.h +++ b/paddle/operators/scatter.cu.h @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/tensor.h" diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index 173c9582557eb4e020824d5830731e3e2312dc3c..806dccc6ca78bf64da828fe13e08e043097bd939 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -49,7 +49,7 @@ class ScatterOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Ref")->type()), @@ -68,7 +68,7 @@ class ScatterGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Ref")->type()), diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu index 6b43a1389f98bf268cb3b70d7e61409f361e0063..0c198d225890882ab6697d3a8b3d17e034c06cc4 100644 --- a/paddle/operators/scatter_op.cu +++ b/paddle/operators/scatter_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "gather.cu.h" #include "paddle/operators/gather_op.h" diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index a5681910708bae584bdf4217b61ca63a3988e1ff..95c207221a7b34732eca4cfd07fed0a8f1671981 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include @@ -41,9 +41,11 @@ class SendOp : public framework::OperatorBase { grpc::CreateChannel(ep, grpc::InsecureChannelCredentials()))); } } + void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto ins = Inputs("X"); + auto outs = Outputs("Out"); std::vector epmap = Attr>("epmap"); // TODO(typhoonzero): use async calls to send multiple variable asyncly. for (size_t i = 0; i < ins.size(); ++i) { @@ -54,10 +56,10 @@ class SendOp : public framework::OperatorBase { } // TODO(typhoonzero): support async optimization client_map_[epmap[0]]->Wait(); - for (size_t i = 0; i < ins.size(); ++i) { - bool ret = client_map_[epmap[i]]->GetVariable(scope, ins[i]); + for (size_t i = 0; i < outs.size(); ++i) { + bool ret = client_map_[epmap[i]]->GetVariable(scope, outs[i]); if (!ret) { - LOG(ERROR) << "GetVariable error: " << ins[i]; + LOG(ERROR) << "GetVariable error: " << outs[i]; } } } @@ -72,18 +74,22 @@ class SendOpMaker : public framework::OpProtoAndCheckerMaker { SendOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "(Tensor) Input tensor to be send").AsDuplicable(); + AddOutput("Out", "(Tensor) Output tensor to get from server") + .AsDuplicable(); AddComment(R"DOC( Recv operator -This operator will recv tensor from send_op +This operator will send tensor to recv_op. )DOC"); AddAttr>("endpoints", "(string vector, default 127.0.0.1:6164)" - "Server endpoints to send variables to."); + "Server endpoints to send variables to.") + .SetDefault({}); AddAttr>("epmap", "(string vector, default 127.0.0.1:6164)" "Server endpoints in the order of input " - "variables for mapping"); + "variables for mapping") + .SetDefault({}); } }; diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc index d899d8154cce57a88de0e1d19e3393528ce367e2..fa94424bf9e8e719ec0822268685b0806a109d21 100644 --- a/paddle/operators/send_recv_op_test.cc +++ b/paddle/operators/send_recv_op_test.cc @@ -1,19 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -// TODO(typhoonzero): add python bindings for this test as -// a RemoteOptimizer. +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include @@ -23,22 +20,27 @@ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/program_desc.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/selected_rows_functor.h" #include "paddle/string/printf.h" USE_NO_KERNEL_OP(send); USE_NO_KERNEL_OP(recv); USE_OP(sum); +namespace f = paddle::framework; +namespace p = paddle::platform; +namespace m = paddle::operators::math; + // global for simplicity. -std::unique_ptr recv_op; +std::unique_ptr recv_op; -void InitTensorsInScope(paddle::framework::Scope &scope, - paddle::platform::CPUPlace &place) { - paddle::platform::CPUDeviceContext ctx(place); +void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) { + p::CPUDeviceContext ctx(place); for (int i = 0; i < 2; ++i) { auto var_name = paddle::string::Sprintf("x%d", i); auto var = scope.Var(var_name); - auto tensor = var->GetMutable(); + auto tensor = var->GetMutable(); tensor->Resize({10, 10}); float *expect = tensor->mutable_data(place); for (int64_t i = 0; i < tensor->numel(); ++i) { @@ -47,21 +49,53 @@ void InitTensorsInScope(paddle::framework::Scope &scope, } auto out_var = scope.Var("Out"); - auto out_tensor = out_var->GetMutable(); + auto out_tensor = out_var->GetMutable(); out_tensor->Resize({10, 10}); out_tensor->mutable_data(place); // allocate } -void AddOp(const std::string &type, - const paddle::framework::VariableNameMap &inputs, - const paddle::framework::VariableNameMap &outputs, - paddle::framework::AttributeMap attrs, - paddle::framework::BlockDesc *block) { +void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) { + p::CPUDeviceContext ctx(place); + int64_t height = 10; + int64_t row_numel = 10; + m::SetConstant set_one; + // init x0 + std::vector rows0{0, 4, 7}; + auto x0_var = scope.Var("x0"); + auto x0 = x0_var->GetMutable(); + x0->set_rows(rows0); + x0->set_height(height); + auto x0_value = x0->mutable_value(); + x0_value->mutable_data( + f::make_ddim({static_cast(rows0.size()), row_numel}), place); + set_one(ctx, x0_value, 1.0); + + // init x1 + std::vector rows1{2, 9}; + auto x1_var = scope.Var("x1"); + auto x1 = x1_var->GetMutable(); + x1->set_rows(rows1); + x1->set_height(height); + auto x1_value = x1->mutable_value(); + x1_value->mutable_data( + f::make_ddim({static_cast(rows1.size()), row_numel}), place); + set_one(ctx, x1_value, 1.0); + + auto out_var = scope.Var("Out"); + auto out = out_var->GetMutable(); + auto out_value = out->mutable_value(); + out->set_height(height); + out_value->mutable_data(f::make_ddim({5, 10}), place); +} + +void AddOp(const std::string &type, const f::VariableNameMap &inputs, + const f::VariableNameMap &outputs, f::AttributeMap attrs, + f::BlockDesc *block) { // insert output for (auto kv : outputs) { for (auto v : kv.second) { auto var = block->Var(v); - var->SetDataType(paddle::framework::proto::DataType::FP32); + var->SetDataType(f::proto::DataType::FP32); } } @@ -77,57 +111,99 @@ void AddOp(const std::string &type, op->SetAttrMap(attrs); } -void StartServerNet() { - paddle::framework::Scope scope; - paddle::platform::CPUPlace place; - InitTensorsInScope(scope, place); +void StartServerNet(bool is_sparse) { + f::Scope scope; + p::CPUPlace place; + if (is_sparse) { + InitSelectedRowsInScope(scope, place); + } else { + InitTensorsInScope(scope, place); + } // sub program run in recv_op, for simple test we use sum - paddle::framework::ProgramDesc program; - paddle::framework::BlockDesc *block = program.MutableBlock(0); + f::ProgramDesc program; + f::BlockDesc *block = program.MutableBlock(0); // X for server side tensors, RX for received tensers, must be of same shape. AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, block); - paddle::framework::AttributeMap attrs; + f::AttributeMap attrs; attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); + attrs.insert({"ParamList", std::vector({"Out"})}); + attrs.insert({"GradList", std::vector({"x1"})}); std::string program_proto; PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto)); attrs.insert({"OptimizeProgram", program_proto}); - recv_op = paddle::framework::OpRegistry::CreateOp( - "recv", {{"RX", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs); - paddle::platform::CPUDeviceContext ctx(place); - recv_op->Run(scope, ctx); + recv_op = f::OpRegistry::CreateOp("recv", {{"RX", {"x1"}}}, {}, attrs); + recv_op->Run(scope, place); } -TEST(SendRecvOp, CPU) { - std::thread server_thread(StartServerNet); - sleep(5); // wait server to start +TEST(SendRecvOp, CPUDense) { + std::thread server_thread(StartServerNet, false); + sleep(3); // wait server to start // local net - paddle::framework::Scope scope; - paddle::platform::CPUPlace place; + f::Scope scope; + p::CPUPlace place; InitTensorsInScope(scope, place); - paddle::framework::AttributeMap attrs; - attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); - - auto send_op = paddle::framework::OpRegistry::CreateOp( - "send", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs); - paddle::platform::CPUDeviceContext ctx(place); - send_op->Run(scope, ctx); + f::AttributeMap attrs; + attrs.insert({"endpoints", std::vector({"127.0.0.1:6174"})}); + attrs.insert({"epmap", std::vector({"127.0.0.1:6174"})}); + auto send_op = f::OpRegistry::CreateOp("send", {{"X", {"x1"}}}, + {{"Out", {"Out"}}}, attrs); + send_op->Run(scope, place); - auto in_var = scope.Var("x0"); - auto tensor = in_var->GetMutable(); + auto in_var = scope.Var("x1"); + auto tensor = in_var->GetMutable(); float *expected = tensor->data(); - auto out_var = scope.Var("Out"); - auto target = out_var->GetMutable(); - // send fail cause output is none. + auto target = out_var->GetMutable(); + // x1 * 2 == x0 EXPECT_NE(target->memory_size(), size_t(0)); float *actual = target->data(); for (int64_t i = 0; i < target->numel(); ++i) { EXPECT_EQ(expected[i] * 2, actual[i]); } - recv_op.reset(); // dtor can shutdown and join server thread. + recv_op->Stop(); + server_thread.join(); + recv_op.reset(nullptr); +} + +TEST(SendRecvOp, CPUSparse) { + std::thread server_thread(StartServerNet, true); + sleep(3); // wait server to start + // local net + f::Scope scope; + p::CPUPlace place; + p::CPUDeviceContext ctx(place); + InitSelectedRowsInScope(scope, place); + f::AttributeMap attrs; + attrs.insert({"endpoints", std::vector({"127.0.0.1:6174"})}); + attrs.insert({"epmap", std::vector({"127.0.0.1:6174"})}); + auto send_op = f::OpRegistry::CreateOp("send", {{"X", {"x1"}}}, + {{"Out", {"Out"}}}, attrs); + send_op->Run(scope, place); + + auto x0 = scope.Var("x0")->GetMutable(); + auto x1 = scope.Var("x1")->GetMutable(); + auto out = scope.Var("Out")->GetMutable(); + auto actual = out->mutable_value(); + + std::unique_ptr expect{new f::SelectedRows()}; + auto expect_value = expect->mutable_value(); + expect_value->mutable_data(f::make_ddim({5, 10}), place); + + m::SelectedRowsAdd add_functor; + add_functor(ctx, *x0, *x1, expect.get()); + + EXPECT_EQ(actual->numel(), expect_value->numel()); + EXPECT_EQ(out->rows().size(), x0->rows().size() + x1->rows().size()); + + for (int64_t i = 0; i < expect_value->numel(); ++i) { + EXPECT_EQ(expect_value->mutable_data(place)[i], + actual->mutable_data(place)[i]); + } + recv_op->Stop(); server_thread.join(); + recv_op.reset(); } diff --git a/paddle/operators/sequence_conv_op.cu.cc b/paddle/operators/sequence_conv_op.cu.cc index eacba79ace3e60a408d5f5e21a6fe2658da56ca7..0b8f2c695564f19cf71ecc56a60e707c3703af36 100644 --- a/paddle/operators/sequence_conv_op.cu.cc +++ b/paddle/operators/sequence_conv_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/sequence_conv_op.h" diff --git a/paddle/operators/sequence_expand_op.cc b/paddle/operators/sequence_expand_op.cc index 6227408be0529e63318bddcf6fa4f1927bb05eca..b40ec617e42110e0ab5168a8ac675adaf760fb3c 100644 --- a/paddle/operators/sequence_expand_op.cc +++ b/paddle/operators/sequence_expand_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/sequence_expand_op.h" diff --git a/paddle/operators/sequence_expand_op.cu b/paddle/operators/sequence_expand_op.cu index f79c84dff8bf4f0e97f89d5c8bb23655abd73d46..0b9638b2ce60f73c95e1ccbcfb16cef7b5351073 100644 --- a/paddle/operators/sequence_expand_op.cu +++ b/paddle/operators/sequence_expand_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/sequence_expand_op.h" diff --git a/paddle/operators/sequence_expand_op.h b/paddle/operators/sequence_expand_op.h index 411b819c6563ec95b87881082caef5f5eb471d3b..2ba628e9c37278025e31779ab0468db46f2ff40a 100644 --- a/paddle/operators/sequence_expand_op.h +++ b/paddle/operators/sequence_expand_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index 0eb675caaddf1274a941cbfe29017cb9ea11f40f..aea98744d8fc1fc59a07250d57f76f26fb9f3634 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -49,7 +49,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { .AsIntermediate(); AddAttr( "pooltype", - "(int, default AVERAGE) the pooling pooltype of SequencePoolOp.") + "(string, default 'AVERAGE') the pooling pooltype of SequencePoolOp.") .SetDefault("AVERAGE") .InEnum({"AVERAGE", "SUM", "SQRT", "LAST", "FIRST", "MAX"}); AddComment(R"DOC( @@ -107,7 +107,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/sequence_pool_op.cu b/paddle/operators/sequence_pool_op.cu index fcd65084353744dc836ff1dc5a3aa4b03a205130..265f695935236236f98c2dd2062072756e9c8b14 100644 --- a/paddle/operators/sequence_pool_op.cu +++ b/paddle/operators/sequence_pool_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU diff --git a/paddle/operators/sequence_slice_op.cc b/paddle/operators/sequence_slice_op.cc index 309ee1f3a82c35104db74084c4ef761bd4b06695..98bd8854903e5abf6d27432a2af0aaae980c0b1d 100644 --- a/paddle/operators/sequence_slice_op.cc +++ b/paddle/operators/sequence_slice_op.cc @@ -48,7 +48,7 @@ class SequenceSliceOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -69,7 +69,7 @@ class SequenceSliceGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index fb4b43e472f86f2fa30a7013731c4621cb2b3e0e..a11c9624ce5e8485449dd6b420ad1f23ff3550c7 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -61,43 +61,9 @@ $$param\_out = param - learning\_rate * grad$$ } }; -template -struct SparseSGDFunctor { - void operator()(const platform::CPUDeviceContext& context, - const framework::SelectedRows& input, - const framework::Tensor& learning_rate, - framework::Tensor* output) { - auto in_height = input.height(); - auto out_dims = output->dims(); - PADDLE_ENFORCE_EQ(in_height, out_dims[0]); - - auto& in_value = input.value(); - auto& in_rows = input.rows(); - - int64_t in_row_numel = in_value.numel() / in_rows.size(); - PADDLE_ENFORCE_EQ(in_row_numel, output->numel() / in_height); - - auto* in_data = in_value.data(); - auto* out_data = output->data(); - auto* lr = learning_rate.data(); - - for (size_t i = 0; i < in_rows.size(); i++) { - for (int64_t j = 0; j < in_row_numel; j++) { - out_data[in_rows[i] * in_row_numel + j] -= - lr[0] * in_data[i * in_row_numel + j]; - } - } - } -}; - -template struct SparseSGDFunctor; -template struct SparseSGDFunctor; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(sgd, ops::SGDOp, ops::SGDOpMaker); -REGISTER_OP_CPU_KERNEL( - sgd, ops::SGDOpKernel, - ops::SGDOpKernel); +REGISTER_OP_CPU_KERNEL(sgd, ops::SGDOpKernel, ops::SGDOpKernel); diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index a3c0db7e50ecaabd6d4b83c43e5436e6be491676..42f8f8b2f072f9d204dfadcd732926b5c98dc617 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/sgd_op.h" @@ -20,6 +20,19 @@ namespace paddle { namespace operators { namespace { + +template +__global__ void SGDKernel(const T* g, const T* p, const T* learning_rate, + const int num, T* p_out) { + T lr = learning_rate[0]; + int grid_size = blockDim.x * gridDim.x; + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; i += grid_size) { + T g_data = g[i]; + T p_data = p[i]; + p_out[i] = p_data - lr * g_data; + } +} + template __global__ void SparseSGDFunctorKernel(const T* selected_rows, const int64_t* rows, @@ -41,40 +54,65 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows, } // namespace template -struct SparseSGDFunctor { - void operator()(const platform::CUDADeviceContext& context, - const framework::SelectedRows& input, - const framework::Tensor& learning_rate, - framework::Tensor* output) { - auto in_height = input.height(); - auto out_dims = output->dims(); - PADDLE_ENFORCE_EQ(in_height, out_dims[0]); - - auto& in_value = input.value(); - auto& in_rows = input.rows(); - - int64_t in_row_numel = in_value.numel() / in_rows.size(); - PADDLE_ENFORCE_EQ(in_row_numel, output->numel() / in_height); - - auto* in_data = in_value.data(); - auto* out_data = output->data(); - - const int block_size = 256; - dim3 threads(block_size, 1); - dim3 grid(1, in_rows.size()); - SparseSGDFunctorKernel<<>>( - in_data, in_rows.data(), learning_rate.data(), out_data, - in_row_numel); +class SGDOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* param = ctx.Input("Param"); + auto* param_out = ctx.Output("ParamOut"); + auto* learning_rate = ctx.Input("LearningRate"); + + auto* grad_var = ctx.InputVar("Grad"); + // Actually, all tensors are LoDTensor except SelectedRows. + if (grad_var->IsType()) { + param_out->mutable_data(ctx.GetPlace()); + auto* grad = ctx.Input("Grad"); + auto* grad_data = grad->data(); + auto* param_data = param->data(); + auto* param_out_data = param_out->data(); + + int block = 512; + int grid = (param->numel() + block - 1) / block; + + SGDKernel<<>>( + grad_data, param_data, learning_rate->data(), param->numel(), + param_out_data); + + } else if (grad_var->IsType()) { + // TODO(qijun): In Sparse SGD operator, in-place update is enforced. + // This manual optimization brings difficulty to track data dependency. + // It's better to find a more elegant solution. + PADDLE_ENFORCE_EQ(param, param_out); + auto* grad = ctx.Input("Grad"); + + auto in_height = grad->height(); + auto out_dims = param_out->dims(); + PADDLE_ENFORCE_EQ(in_height, out_dims[0]); + + auto& in_value = grad->value(); + auto& in_rows = grad->rows(); + + int64_t in_row_numel = in_value.numel() / in_rows.size(); + PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height); + + auto* in_data = in_value.data(); + auto* out_data = param_out->data(); + + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid(1, in_rows.size()); + SparseSGDFunctorKernel< + T, 256><<>>( + in_data, in_rows.data(), learning_rate->data(), out_data, + in_row_numel); + + } else { + PADDLE_THROW("Unsupported Variable Type of Grad"); + } } }; - -template struct SparseSGDFunctor; -template struct SparseSGDFunctor; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - sgd, ops::SGDOpKernel, - ops::SGDOpKernel); +REGISTER_OP_CUDA_KERNEL(sgd, ops::SGDOpCUDAKernel, + ops::SGDOpCUDAKernel); diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index c920025a91cd0b68019bcb05558398093f31e206..a6c544591e1172320f6cf7192bf640ff25225b99 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -20,15 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template -struct SparseSGDFunctor { - void operator()(const DeviceContext& context, - const framework::SelectedRows& input, - const framework::Tensor& learning_rate, - framework::Tensor* output); -}; - -template +template class SGDOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -45,21 +37,36 @@ class SGDOpKernel : public framework::OpKernel { auto p = framework::EigenVector::Flatten(*param); auto g = framework::EigenVector::Flatten(*grad); auto o = framework::EigenVector::Flatten(*param_out); - auto lr = framework::EigenVector::Flatten(*learning_rate); - auto& place = - *ctx.template device_context().eigen_device(); + auto* lr = learning_rate->data(); - Eigen::DSizes grad_dsize(grad->numel()); - o.device(place) = p - lr.broadcast(grad_dsize) * g; + o = p - lr[0] * g; } else if (grad_var->IsType()) { // TODO(qijun): In Sparse SGD operator, in-place update is enforced. // This manual optimization brings difficulty to track data dependency. // It's better to find a more elegant solution. PADDLE_ENFORCE_EQ(param, param_out); auto* grad = ctx.Input("Grad"); - SparseSGDFunctor functor; - functor(ctx.template device_context(), *grad, - *learning_rate, param_out); + + auto in_height = grad->height(); + auto out_dims = param_out->dims(); + PADDLE_ENFORCE_EQ(in_height, out_dims[0]); + + auto& in_value = grad->value(); + auto& in_rows = grad->rows(); + + int64_t in_row_numel = in_value.numel() / in_rows.size(); + PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height); + + auto* in_data = in_value.data(); + auto* out_data = param_out->data(); + auto* lr = learning_rate->data(); + + for (size_t i = 0; i < in_rows.size(); i++) { + for (int64_t j = 0; j < in_row_numel; j++) { + out_data[in_rows[i] * in_row_numel + j] -= + lr[0] * in_data[i * in_row_numel + j]; + } + } } else { PADDLE_THROW("Unsupported Variable Type of Grad"); } diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index 48194a547bbea5ddda7c5f3e2421431d1d81042d..b37269b471b4d71b42c41641fd14c7a64d2719d6 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/lod_rank_table.h" #include "paddle/operators/array_operator.h" #include "paddle/operators/math/math_function.h" @@ -27,11 +27,11 @@ class ShrinkRNNMemoryOp : public ArrayOp { : ArrayOp(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto *x_var = scope.FindVar(Input("X")); PADDLE_ENFORCE(x_var != nullptr, "Input X must be set"); auto &x_tensor = x_var->Get(); - size_t offset = this->GetOffset(scope, dev_ctx); + size_t offset = this->GetOffset(scope, place); auto *rank_table_var = scope.FindVar(Input("RankTable")); PADDLE_ENFORCE(rank_table_var != nullptr, "RankTable must be set"); auto &rank_table = rank_table_var->Get(); @@ -93,7 +93,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { : ArrayOp(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto *dout_var = scope.FindVar(Input(framework::GradVarName("Out"))); auto *dx_var = scope.FindVar(Output(framework::GradVarName("X"))); PADDLE_ENFORCE(dx_var != nullptr, "Input Gradient should not be nullptr"); @@ -105,6 +105,10 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { dx_tensor.Resize(x_tensor.dims()); dx_tensor.mutable_data(x_tensor.place(), x_tensor.type()); + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + if (dout_var == nullptr) { // dx_tensor fill zero math::set_constant(dev_ctx, &dx_tensor, 0.0f); } else { @@ -112,9 +116,9 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto height = dout_tensor.dims()[0]; auto slice = dx_tensor.Slice(0, static_cast(height)); framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice); - if (dx_tensor.dims()[0] < height) { + if (dx_tensor.dims()[0] > height) { auto rest_tensor = dx_tensor.Slice( - static_cast(height), static_cast(dout_tensor.dims()[0])); + static_cast(height), static_cast(dx_tensor.dims()[0])); math::set_constant(dev_ctx, &rest_tensor, 0.0f); } } diff --git a/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc index 9b5227d92d1cfd7d6ac7e28186fbba6d887475b1..c526a88a127da12a6384777bca31b60873844d94 100644 --- a/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/sigmoid_cross_entropy_with_logits_op.h" diff --git a/paddle/operators/sigmoid_cross_entropy_with_logits_op.cu b/paddle/operators/sigmoid_cross_entropy_with_logits_op.cu index 1b569c93ed9568a26824defef0d25bb1c3dadad4..3f393265f48b428dca8703ff77688de979fb63df 100644 --- a/paddle/operators/sigmoid_cross_entropy_with_logits_op.cu +++ b/paddle/operators/sigmoid_cross_entropy_with_logits_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/sigmoid_cross_entropy_with_logits_op.h" diff --git a/paddle/operators/sigmoid_cross_entropy_with_logits_op.h b/paddle/operators/sigmoid_cross_entropy_with_logits_op.h index 8fe7c5ba8224f8dac5de8d7ee772ebc71f987d69..b78bcc436e9fa5c5d4db3fbb22224e328c3bc3c2 100644 --- a/paddle/operators/sigmoid_cross_entropy_with_logits_op.h +++ b/paddle/operators/sigmoid_cross_entropy_with_logits_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" diff --git a/paddle/operators/sign_op.cc b/paddle/operators/sign_op.cc index b2459fb2f53939b3131af1540044ce361b87d08a..f63eaa4464cc668acdb8e5b8a74ad5bba936db44 100644 --- a/paddle/operators/sign_op.cc +++ b/paddle/operators/sign_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/sign_op.h" diff --git a/paddle/operators/sign_op.cu b/paddle/operators/sign_op.cu index 9bc1c65d214ba8f988dec3b7b11da9e1ec3a6581..f224880cffb2154a7c46b8a4701d7357e67bb70c 100644 --- a/paddle/operators/sign_op.cu +++ b/paddle/operators/sign_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/sign_op.h" diff --git a/paddle/operators/sign_op.h b/paddle/operators/sign_op.h index 2e476ed6658491b3dcec3cf1388ccc4a0813449c..9fe49ae1a2161d9f1472eef830c11b0f8305c568 100644 --- a/paddle/operators/sign_op.h +++ b/paddle/operators/sign_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/smooth_l1_loss_op.cc b/paddle/operators/smooth_l1_loss_op.cc index 42a53cfa06f7724000ff59c69504629890f6ec87..dcb18d729da69beaa556e4b93129dafb08b72c06 100644 --- a/paddle/operators/smooth_l1_loss_op.cc +++ b/paddle/operators/smooth_l1_loss_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/smooth_l1_loss_op.h" diff --git a/paddle/operators/smooth_l1_loss_op.cu b/paddle/operators/smooth_l1_loss_op.cu index 8e94ebac644d1047920827250c4313c657b22ea0..213429bc370ef0d5b493b3a448df1b3bf0e4e87c 100644 --- a/paddle/operators/smooth_l1_loss_op.cu +++ b/paddle/operators/smooth_l1_loss_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU diff --git a/paddle/operators/smooth_l1_loss_op.h b/paddle/operators/smooth_l1_loss_op.h index 1a70c9c63c340d66b6bf0db97cc8ab35a663f816..3facfae116d711f86ea5c193562c20ea60a2efc9 100644 --- a/paddle/operators/smooth_l1_loss_op.h +++ b/paddle/operators/smooth_l1_loss_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 6b3f19bb46c45b7dd8ec6c23ee449521b36d759c..e7306bc5f13377813e0bd49846bc834d501602eb 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -24,13 +24,13 @@ class SoftmaxOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of SoftmaxOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Y"), - "Output(Y) of SoftmaxOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SoftmaxOp should not be null."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE(x_dims.size() == 2UL, "The input of softmax op must be a matrix."); - ctx->SetOutputDim("Y", x_dims); + ctx->SetOutputDim("Out", x_dims); } }; @@ -41,7 +41,7 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "The input tensor of softmax. " "2-D with shape [batch_size, input_feature_dimensions]."); - AddOutput("Y", "The normalized values with the same shape as X."); + AddOutput("Out", "The normalized values with the same shape as X."); AddComment(R"DOC( Softmax Operator. @@ -59,7 +59,7 @@ exponential values of all the other dimensions is the output of the softmax operator. For each row $i$ and each column $j$ in Input(X), we have: - $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ + $$Out[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ )DOC"); } @@ -70,12 +70,12 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should be not null."); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), - "Input(Y@GRAD) should be not null."); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Y"), - ctx->GetInputDim(framework::GradVarName("Y")), - "Input(Y) and its gradients should have a same shape."); + PADDLE_ENFORCE(ctx->HasInput("Out"), "Input(Out) should be not null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) should be not null."); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Out"), + ctx->GetInputDim(framework::GradVarName("Out")), + "Input(Out) and its gradients should have a same shape."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } diff --git a/paddle/operators/softmax_op.cu.cc b/paddle/operators/softmax_op.cu.cc index 7b9882cbcfe1a0381541386f76867c6bb0f1fe55..e7da40f3e82d5db858a795e9634abf57b884d6a2 100644 --- a/paddle/operators/softmax_op.cu.cc +++ b/paddle/operators/softmax_op.cu.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/softmax_op.h" diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 0f8998b99e93b5ed6c9b43ad7adabc2d515c1ff1..63e379a3b31a6c75aab0c56b4ce1b988fa7f0318 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -26,13 +26,13 @@ class SoftmaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); - auto* Y = context.Output("Y"); + auto* Out = context.Output("Out"); // allocate memory on device. - Y->mutable_data(context.GetPlace()); + Out->mutable_data(context.GetPlace()); math::SoftmaxFunctor()( - context.template device_context(), X, Y); + context.template device_context(), X, Out); } }; @@ -40,15 +40,15 @@ template class SoftmaxGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* Y = context.Input("Y"); - auto* dY = context.Input(framework::GradVarName("Y")); + auto* Out = context.Input("Out"); + auto* dOut = context.Input(framework::GradVarName("Out")); auto* dX = context.Output(framework::GradVarName("X")); // allocate memory on device. dX->mutable_data(context.GetPlace()); math::SoftmaxGradFunctor()( - context.template device_context(), Y, dY, dX); + context.template device_context(), Out, dOut, dX); } }; diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index d9911a6901447d8900c3881a60c7a0852dcbf429..41e65b701e62bd2e671f3590869a5d7fed90701c 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -1,10 +1,10 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -118,7 +118,7 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Logits")->type()), @@ -159,7 +159,7 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType( diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu index 6100c63f9aba006d9739173a8a5a2fb398187e55..61583c6161c3bbc62788dc8b6940ddcc29b2302a 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -1,10 +1,10 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/paddle/operators/softmax_with_cross_entropy_op.h b/paddle/operators/softmax_with_cross_entropy_op.h index 9c3431605b2f2285b2e7d71c5ff2f4a53c6c6f30..6bde0f37e06ccf7d81487e0c99227287787c5d72 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -1,10 +1,10 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc index 3542d8624fec49f75314f046434cbcadf307497e..2d8787d740c70f1d4696fdec381b572ecf031f57 100644 --- a/paddle/operators/split_lod_tensor_op.cc +++ b/paddle/operators/split_lod_tensor_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/memory/memcpy.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace operators { @@ -33,7 +34,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { auto &x = scope.FindVar(Input("X"))->Get(); auto &mask = scope.FindVar(Input("Mask"))->Get(); auto *out_true = @@ -44,6 +45,9 @@ class SplitLoDTensorOp : public framework::OperatorBase { auto &x_lod = x.lod(); auto &mask_dim = mask.dims(); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); + std::unique_ptr cpu_mask{new framework::LoDTensor()}; if (platform::is_cpu_place(mask.place())) { cpu_mask->ShareDataWith(mask); diff --git a/paddle/operators/squared_l2_distance_op.cu b/paddle/operators/squared_l2_distance_op.cu index ecc82ed1e49501b05e0cf54e5b44114db150a427..f2648dde5eb9c56aed3fad81521e6207dc2d973e 100644 --- a/paddle/operators/squared_l2_distance_op.cu +++ b/paddle/operators/squared_l2_distance_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU diff --git a/paddle/operators/squared_l2_norm_op.cc b/paddle/operators/squared_l2_norm_op.cc index 9c239042cb5127af7eebc0e534da7a7705388de8..6626bf0375548eac457f960105ce33e63e1a3706 100644 --- a/paddle/operators/squared_l2_norm_op.cc +++ b/paddle/operators/squared_l2_norm_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/squared_l2_norm_op.h" diff --git a/paddle/operators/squared_l2_norm_op.cu b/paddle/operators/squared_l2_norm_op.cu index 2d6567d090a96a43cbda203fb8176041d719e55f..b222113a8c82061bd841da440a714b66f6c1fb9c 100644 --- a/paddle/operators/squared_l2_norm_op.cu +++ b/paddle/operators/squared_l2_norm_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #define EIGEN_USE_GPU #include "paddle/operators/squared_l2_norm_op.h" diff --git a/paddle/operators/squared_l2_norm_op.h b/paddle/operators/squared_l2_norm_op.h index 0ced7e7d70ab3627a337d70890db6842ba0f7768..1ce26c775ed5700cee73d00f4c51d58a692a1152 100644 --- a/paddle/operators/squared_l2_norm_op.h +++ b/paddle/operators/squared_l2_norm_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" diff --git a/paddle/operators/strided_memcpy.h b/paddle/operators/strided_memcpy.h index c9dd80518424017d9834a2bf7aee14caa56c9d79..735cabcd973a3b5ea1ab8ab57091eae14e23b89b 100644 --- a/paddle/operators/strided_memcpy.h +++ b/paddle/operators/strided_memcpy.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/operators/detail/strided_memcpy.h" diff --git a/paddle/operators/strided_memcpy_test.cc b/paddle/operators/strided_memcpy_test.cc index 230cc1ab0bbd5ac57eb7494795e3fbcdf02c3cc8..06d81188558aad85c41c56ddefad3617d48da74c 100644 --- a/paddle/operators/strided_memcpy_test.cc +++ b/paddle/operators/strided_memcpy_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/strided_memcpy.h" #include "gtest/gtest.h" @@ -82,7 +82,7 @@ TEST(StridedMemcpy, GPUCrop) { }; // clang-format on - platform::GPUPlace gpu0(0); + platform::CUDAPlace gpu0(0); platform::CPUPlace cpu; platform::CUDADeviceContext ctx(gpu0); @@ -121,7 +121,7 @@ TEST(StridedMemcpy, GPUConcat) { }; // clang-format on - platform::GPUPlace gpu0(0); + platform::CUDAPlace gpu0(0); platform::CPUPlace cpu; platform::CUDADeviceContext ctx(gpu0); diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 891839bf9cd991e15d96b86e24ea61b09e35a7c7..b86e8266425ca094a51d224fd39ce33700057f13 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -53,7 +53,7 @@ class SumOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { auto x_vars = ctx.MultiInputVar("X"); if (x_vars[0]->IsType()) { diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index eaa36aa1aea53e0b37ef6c578d8bb1cda230ded0..552b48f608b7e0248f03dbea940a83f112a67712 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -37,11 +37,11 @@ class SumKernel : public framework::OpKernel { bool in_place = out_var == in_vars[0]; if (out_var->IsType()) { - auto *out = context.Output("Out"); - out->mutable_data(context.GetPlace()); - + auto *out = context.Output("Out"); + if (!in_place) { + out->mutable_data(context.GetPlace()); + } auto result = EigenVector::Flatten(*out); - if (!in_place) { math::SetConstant constant_functor; constant_functor(context.template device_context(), out, diff --git a/paddle/operators/tensor.save b/paddle/operators/tensor.save deleted file mode 100644 index c24308a7d0131b84c28c0a9857cce4949afb2091..0000000000000000000000000000000000000000 Binary files a/paddle/operators/tensor.save and /dev/null differ diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 90cbc19d1b1bab2e639e3d6d5b28cd13b30542f6..d5ff3e3fce29b1a888b2cd4d307c2655669e3e4c 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/array_operator.h" #include "paddle/operators/detail/safe_ref.h" namespace paddle { @@ -25,11 +25,11 @@ class WriteToArrayOp : public ArrayOp { : ArrayOp(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); if (x == nullptr) return; auto &x_tensor = x->Get(); - size_t offset = GetOffset(scope, dev_ctx); + size_t offset = GetOffset(scope, place); auto *out = scope.FindVar(Output("Out"))->GetMutable(); if (offset >= out->size()) { @@ -39,7 +39,12 @@ class WriteToArrayOp : public ArrayOp { } if (x_tensor.memory_size() > 0) { auto *out_tensor = &out->at(offset); - CopyFrom(x_tensor, dev_ctx.GetPlace(), dev_ctx, out_tensor); + + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + CopyFrom(x_tensor, place, dev_ctx, out_tensor); out_tensor->set_lod(x_tensor.lod()); } else { VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so " @@ -119,17 +124,19 @@ class ReadFromArrayOp : public ArrayOp { const framework::AttributeMap &attrs) : ArrayOp(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); PADDLE_ENFORCE(x != nullptr, "X must be set"); auto &x_array = x->Get(); auto *out = scope.FindVar(Output("Out")); PADDLE_ENFORCE(out != nullptr, "Out must be set"); - auto *out_tensor = out->GetMutable(); - size_t offset = GetOffset(scope, dev_ctx); + size_t offset = GetOffset(scope, place); if (offset < x_array.size()) { - framework::CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx, - out_tensor); + auto *out_tensor = out->GetMutable(); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor); out_tensor->set_lod(x_array[offset].lod()); } else { VLOG(10) << "offset " << offset << " >= " << x_array.size(); diff --git a/paddle/operators/top_k_op.cu b/paddle/operators/top_k_op.cu index 453bd07267e3a6e33211117368dd9aff10a9e23f..f7bf58e7218cb9b94526cb9346bc5f9aa971038a 100644 --- a/paddle/operators/top_k_op.cu +++ b/paddle/operators/top_k_op.cu @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/platform/assert.h" @@ -283,7 +283,7 @@ class TopkOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use GPUPlace."); + "It must use CUDAPlace."); auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); auto* indices = ctx.Output("Indices"); diff --git a/paddle/operators/transpose_op.cc b/paddle/operators/transpose_op.cc index 0109b8bc5c30e0fe3e4ff9d741cd76b741e17926..11615d806a61b3525d2ed50f5ea5940e8d61c8f8 100644 --- a/paddle/operators/transpose_op.cc +++ b/paddle/operators/transpose_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/transpose_op.h" @@ -70,18 +70,31 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker { Transpose Operator. The input tensor will be permuted according to the axis values given. -The op functions similar to how numpy.transpose works in python. +The op functions is similar to how numpy.transpose works in python. + For example: - >> input = numpy.arange(6).reshape((2,3)) - >> input - array([[0, 1, 2], - [3, 4, 5]]) - >> axis = [1, 0] - >> output = input.transpose(axis) - >> output - array([[0, 3], - [1, 4], - [2, 5]]) + + .. code-block:: text + + input = numpy.arange(6).reshape((2,3)) + + the input is: + + array([[0, 1, 2], + [3, 4, 5]]) + + given axis is: + + [1, 0] + + output = input.transpose(axis) + + then the output is: + + array([[0, 3], + [1, 4], + [2, 5]]) + So, given a input tensor of shape(N, C, H, W) and the axis is {0, 2, 3, 1}, the output tensor shape will be (N, H, W, C) diff --git a/paddle/operators/transpose_op.cu.cc b/paddle/operators/transpose_op.cu.cc index 7d23f1493ec2d548438aeb2493fda8a4ff8c6e80..281c4468cc267c659befd238c9a286dd23eaf16d 100644 --- a/paddle/operators/transpose_op.cu.cc +++ b/paddle/operators/transpose_op.cu.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/operators/transpose_op.h" diff --git a/paddle/operators/transpose_op.h b/paddle/operators/transpose_op.h index d995271a6be3266e05c742ab18c34636da384e66..b9686a2db3f76dbb9b8ebdba5e243f5e5a3c571a 100644 --- a/paddle/operators/transpose_op.h +++ b/paddle/operators/transpose_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 3c705cb3396f68f88882388675ab145660e13070..4d5dd86cb8103a76247913cc088db4cff6b6ff43 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ -#include -#include +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" @@ -63,7 +63,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( static_cast(ctx.Attr("dtype")), diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index cfe9d293cff2108cf25749d0e78e2e86e6e198a5..719d0872a7cba55dc97c95a73a7f86614ab3f4b7 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ -#include -#include +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include #include "paddle/framework/op_registry.h" diff --git a/paddle/operators/unpool_op.cc b/paddle/operators/unpool_op.cc index 7c035c0b48ebb0d7115e1499c03f8f40f2ca7282..aeed9679b2a3cce744189809c223a5b0d581ccdd 100644 --- a/paddle/operators/unpool_op.cc +++ b/paddle/operators/unpool_op.cc @@ -53,16 +53,14 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker { "(string), unpooling type, can be \"max\" for max-unpooling ") .InEnum({"max"}); AddComment(R"DOC( - "Input shape: $(N, C_{in}, H_{in}, W_{in})$ - Output shape: $(N, C_{out}, H_{out}, W_{out})$ - Where - $$ - H_{out} = (H_{in}−1) * strides[0] − 2 * paddings[0] + ksize[0] \\ - W_{out} = (W_{in}−1) * strides[1] − 2 * paddings[1] + ksize[1] - $$ - Paper: http://www.matthewzeiler.com/wp-content/uploads/2017 - /07/iccv2011.pdf - )DOC"); +Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is: +$(N, C_{out}, H_{out}, W_{out})$, where +$$ +H_{out} = (H_{in}−1) * strides[0] − 2 * paddings[0] + ksize[0] \\ +W_{out} = (W_{in}−1) * strides[1] − 2 * paddings[1] + ksize[1] +$$ +Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf +)DOC"); } }; @@ -73,7 +71,7 @@ int OutputSize(int input_size, int ksize, int padding, int stride) { class UnpoolOp : public framework::OperatorWithKernel { protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -112,7 +110,7 @@ class UnpoolOp : public framework::OperatorWithKernel { class UnpoolOpGrad : public framework::OperatorWithKernel { protected: - framework::OpKernelType GetKernelType( + framework::OpKernelType GetActualKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 324c8b98c4811328b2a89eadc3e3420c080bd7d1..65d827e0e0c5cfc3897c1fd0b971b766201cc1e2 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include "paddle/framework/executor.h" @@ -25,12 +25,12 @@ namespace operators { using StepScopeVar = std::vector; using LoDTensor = framework::LoDTensor; -constexpr char kStepBlock[] = "sub_block"; -constexpr char kCondition[] = "Condition"; -constexpr char kStepScopes[] = "StepScopes"; -constexpr char kParameters[] = "X"; -constexpr char kParamGrads[] = "X@GRAD"; -constexpr char kOutputs[] = "Out"; +static constexpr char kStepBlock[] = "sub_block"; +static constexpr char kCondition[] = "Condition"; +static constexpr char kStepScopes[] = "StepScopes"; +static constexpr char kX[] = "X"; +static constexpr char kXGRAD[] = "X@GRAD"; +static constexpr char kOutputs[] = "Out"; class WhileOp : public framework::OperatorBase { public: @@ -40,13 +40,14 @@ class WhileOp : public framework::OperatorBase { : framework::OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { + const platform::Place &dev_place) const override { PADDLE_ENFORCE_NOT_NULL(scope.FindVar(Input(kCondition))); auto &cond = scope.FindVar(Input(kCondition))->Get(); PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1})); - framework::Executor executor(dev_ctx); + framework::Executor executor(dev_place); auto *block = Attr(kStepBlock); + auto *program = block->Program(); auto step_scopes = @@ -66,7 +67,7 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker { public: WhileOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput(kParameters, + AddInput(kX, "A set of variables, which are required by operators inside the " "block of While Op.") .AsDuplicable(); @@ -97,8 +98,8 @@ class WhileGradOp : public framework::OperatorBase { : framework::OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { - framework::Executor executor(dev_ctx); + const platform::Place &dev_place) const override { + framework::Executor executor(dev_place); auto *block = Attr(kStepBlock); auto *program = block->Program(); @@ -157,8 +158,8 @@ class WhileGradOp : public framework::OperatorBase { executor.Run(*program, *cur_scope_iter, block->ID(), false); - auto &pg_names = Outputs(kParamGrads); - auto &p_names = Inputs(kParameters); + auto &pg_names = Outputs(kXGRAD); + auto &p_names = Inputs(kX); PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size()); for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { if (pg_names[param_id] == framework::kEmptyVarName) { @@ -189,7 +190,7 @@ class WhileGradOp : public framework::OperatorBase { auto zero_op = framework::OpRegistry::CreateOp( "fill_constant", framework::VariableNameMap{}, {{"Out", {pg_names[param_id]}}}, attrs); - zero_op->Run(scope, dev_ctx); + zero_op->Run(scope, dev_place); } } @@ -197,7 +198,7 @@ class WhileGradOp : public framework::OperatorBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); - sum_op->Run(cur_scope, dev_ctx); + sum_op->Run(cur_scope, dev_place); cur_scope.Rename(new_inside_name, inside_grad_name); } } @@ -212,11 +213,11 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { std::unique_ptr Apply() const override { auto *grad = new framework::OpDesc(); grad->SetType("while_grad"); - grad->SetInput(kParameters, Input(kParameters)); + grad->SetInput(kX, Input(kX)); // Not all of IGs will be generated by inner gradient operators of while op. // Ignore IGs that is not generated by the inside block. - auto igs = InputGrad(kParameters, /*do not drop empty gradient*/ false); + auto igs = InputGrad(kX, /*do not drop empty gradient*/ false); std::unordered_set all_outs; for (size_t i = 0; i < grad_block_[0]->OpSize(); ++i) { for (auto &oname : grad_block_[0]->Op(i)->OutputArgumentNames()) { @@ -230,7 +231,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { } } - grad->SetOutput(framework::GradVarName(kParameters), igs); + grad->SetOutput(framework::GradVarName(kX), igs); grad->SetInput(kOutputs, Output(kOutputs)); @@ -239,7 +240,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { std::unordered_set block_ins; auto *fwd_block = this->grad_block_[0]->ParentBlock(); { - for (auto &p : Input(kParameters)) { + for (auto &p : Input(kX)) { block_ins.insert(p); } for (auto &o : Output(kOutputs)) { @@ -287,8 +288,8 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference { public: void operator()(const framework::OpDesc &op_desc, framework::BlockDesc *block) const override { - auto p_names = op_desc.Input(kParameters); - auto pg_names = op_desc.Output(framework::GradVarName(kParameters)); + auto p_names = op_desc.Input(kX); + auto pg_names = op_desc.Output(framework::GradVarName(kX)); for (size_t i = 0; i < p_names.size(); ++i) { auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i])); @@ -306,21 +307,21 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference { class WhileGradOpShapeInference : public framework::InferShapeBase { public: void operator()(framework::InferShapeContext *ctx) const override { - ctx->HasInputs(kParameters); - ctx->HasOutputs(framework::GradVarName(kParameters)); + ctx->HasInputs(kX); + ctx->HasOutputs(framework::GradVarName(kX)); ctx->HasInputs(kOutputs); ctx->HasInputs(framework::GradVarName(kOutputs)); - auto p_names = ctx->Inputs(kParameters); - auto pg_names = ctx->Outputs(kParamGrads); - auto var_types = ctx->GetInputsVarType(kParameters); + auto p_names = ctx->Inputs(kX); + auto pg_names = ctx->Outputs(kXGRAD); + auto var_types = ctx->GetInputsVarType(kX); std::vector names_to_set; std::vector dims_to_set; for (size_t i = 0; i < p_names.size(); ++i) { if (pg_names[i] == framework::kEmptyVarName) { continue; } - auto dims = ctx->GetInputsElementDim(kParameters, i); + auto dims = ctx->GetInputsElementDim(kX, i); if (var_types[i] == framework::proto::VarDesc::LOD_TENSOR) { names_to_set.push_back(pg_names[i]); dims_to_set.push_back(dims); diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc index 5cc7c47d4486c3d149c37fd6e312780f3d44eda8..8ca048257e53af21f327ee1b8f24b9e2e5d54060 100644 --- a/paddle/optimizer/adadelta_optimizer.cc +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "adadelta_optimizer.h" #include diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h index 6aab1ad553b15ebbd2d04c9323c5e56e1b8f60f5..48f1ae175041cc1cfa11787c44317dd173430928 100644 --- a/paddle/optimizer/adadelta_optimizer.h +++ b/paddle/optimizer/adadelta_optimizer.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc index c981996bab1b2e7ae5d6e2d858a73efde12e32f3..c6d39a366adcd40e7298e1f1bed48c15f517a217 100644 --- a/paddle/optimizer/adagrad_optimizer.cc +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h index 447b7c7547d5bad7436df6f3b3582b4a219f08c8..b0cff061f5c311d2356bab4965ec65c5eca3dbb8 100644 --- a/paddle/optimizer/adagrad_optimizer.h +++ b/paddle/optimizer/adagrad_optimizer.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc index 6dc2d749708d0e2a7f36734d89eec30d4576842e..8a384b59c47df321438e3a3bc245a2398a7ef6d1 100644 --- a/paddle/optimizer/adam_optimizer.cc +++ b/paddle/optimizer/adam_optimizer.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "adam_optimizer.h" #include diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h index 37ab53afc37a5f749a2909de12c7871ed926583f..7df40064df31536cead9eba4ce827452cf0dce38 100644 --- a/paddle/optimizer/adam_optimizer.h +++ b/paddle/optimizer/adam_optimizer.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/optimizer/optimizer.cc b/paddle/optimizer/optimizer.cc index faa23764522cef03bae1359adbf58d10ee7809ac..3af44484363654befe85c8511d7c0791d5a33ab8 100644 --- a/paddle/optimizer/optimizer.cc +++ b/paddle/optimizer/optimizer.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "optimizer.h" #include diff --git a/paddle/optimizer/optimizer.h b/paddle/optimizer/optimizer.h index e6fa12a4d250ccb078358704b0131942ea6ab039..516e612167f557f043d3fe29789bec4b73405755 100644 --- a/paddle/optimizer/optimizer.h +++ b/paddle/optimizer/optimizer.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/optimizer/parameter_optimizer.cc b/paddle/optimizer/parameter_optimizer.cc index da92c2d01cc2a27d1fadd51a338d23b01e0cb0bc..1603e5fdc8a69d8b117f02b9c950a5374625c632 100644 --- a/paddle/optimizer/parameter_optimizer.cc +++ b/paddle/optimizer/parameter_optimizer.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include "adadelta_optimizer.h" diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h index 99d0416e751c4ca6695d6ed77396e18d48fc86b8..1f501c49e1d7a8018f3274ae1128ca230b9298bb 100644 --- a/paddle/optimizer/parameter_optimizer.h +++ b/paddle/optimizer/parameter_optimizer.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/optimizer/sgd_optimizer.cc index c150144ac24b8375d08691a98be680b6bf5d1e7f..ee80f543fc4c1d87ba56540fc581816df8a39793 100644 --- a/paddle/optimizer/sgd_optimizer.cc +++ b/paddle/optimizer/sgd_optimizer.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "sgd_optimizer.h" #include "serialization.h" diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h index 0b1da0aa27d98e8d6a8d9fd7a1ebe355acb2a1f4..16a4df9973ef6a599a8ecee6be5903f13799771c 100644 --- a/paddle/optimizer/sgd_optimizer.h +++ b/paddle/optimizer/sgd_optimizer.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 88df28a9668e5f354d115ff8ab32cb21e03aefb5..44f6d85cd1510f309595ca711de2e0f767219580 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -21,12 +21,21 @@ ELSE() set(GPU_CTX_DEPS) ENDIF() +IF(WITH_MKLDNN) + set(MKLDNN_CTX_DEPS mkldnn) +ELSE() + set(MKLDNN_CTX_DEPS) +ENDIF() + # memcpy deoends on device_context, here add deps individually for # avoiding cycle dependencies cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator - system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) -nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) + system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) +nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context) + +cc_library(profiler SRCS profiler.cc DEPS device_context) +cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) diff --git a/paddle/platform/call_once.h b/paddle/platform/call_once.h index d9f49527dcf150fcb35d3af512088f75dec0b5c6..00337a7f051758559a0f8012d8c78dbe8e3457a6 100644 --- a/paddle/platform/call_once.h +++ b/paddle/platform/call_once.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/platform/cuda_profiler.h b/paddle/platform/cuda_profiler.h index b6311cb23d695c3cd851bcca120c24cced7fdd62..67d5f626d41c0fae280062533d6c1d1074341164 100644 --- a/paddle/platform/cuda_profiler.h +++ b/paddle/platform/cuda_profiler.h @@ -22,23 +22,7 @@ namespace paddle { namespace platform { void CudaProfilerInit(std::string output_file, std::string output_mode, - std::vector config_flags) { - std::array buf; - std::string tmpl = "/tmp/cuda_profile_config.XXXXXX"; - PADDLE_ENFORCE_LT(tmpl.size(), buf.size()); - memcpy(buf.data(), tmpl.data(), tmpl.size()); - auto result = mktemp(buf.data()); - PADDLE_ENFORCE(strlen(result) != 0); - std::string config_file = result; - - { - std::ofstream ofs(config_file, std::ios::out | std::ios::trunc); - PADDLE_ENFORCE(ofs.is_open(), "ofstream: ", ofs.rdstate()); - for (const auto& line : config_flags) { - ofs << line << std::endl; - } - } - + std::string config_file) { PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv"); cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; PADDLE_ENFORCE( diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index dacee74fff369586c7ca2ff62cfe6aeebd8f39c7..9d9348079a0179418ab1a1474cfd8b69136f26b2 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -15,6 +15,41 @@ limitations under the License. */ namespace paddle { namespace platform { +DeviceContextPool* DeviceContextPool::pool = nullptr; + +const platform::DeviceContext* DeviceContextPool::Get( + const platform::Place& place) { + auto it = device_contexts_.find(place); + if (it == device_contexts_.end()) { + PADDLE_THROW( + "'Place' is not supported, Please re-compile with WITH_GPU " + "option"); + } + return it->second; +} + +DeviceContextPool::DeviceContextPool( + const std::vector& places) { + PADDLE_ENFORCE_GT(places.size(), 0); + for (size_t i = 0; i < places.size(); i++) { + if (platform::is_cpu_place(places[i])) { + device_contexts_.emplace(places[i], + new platform::CPUDeviceContext( + boost::get(places[i]))); + } else if (platform::is_gpu_place(places[i])) { +#ifdef PADDLE_WITH_CUDA + device_contexts_.emplace(places[i], + new platform::CUDADeviceContext( + boost::get(places[i]))); +#else + PADDLE_THROW( + "'CUDAPlace' is not supported, Please re-compile with WITH_GPU " + "option"); +#endif + } + } +} + CPUDeviceContext::CPUDeviceContext() { eigen_device_.reset(new Eigen::DefaultDevice()); } @@ -38,7 +73,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { } ~EigenCudaStreamDevice() override {} - void Reinitialize(const cudaStream_t* cuda_stream, GPUPlace place) { + void Reinitialize(const cudaStream_t* cuda_stream, CUDAPlace place) { stream_ = cuda_stream; place_ = place; device_prop_ = &Eigen::m_deviceProperties[place.device]; @@ -77,14 +112,14 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { } private: - GPUPlace place_; + CUDAPlace place_; const cudaStream_t* stream_; // not owned; const cudaDeviceProp* device_prop_; // not owned; mutable void* scratch_; mutable unsigned int* semaphore_; }; -CUDADeviceContext::CUDADeviceContext(GPUPlace place) : place_(place) { +CUDADeviceContext::CUDADeviceContext(CUDAPlace place) : place_(place) { SetDeviceId(place_.device); PADDLE_ENFORCE(cudaStreamCreate(&stream_)); eigen_stream_.reset(new EigenCudaStreamDevice()); @@ -92,15 +127,21 @@ CUDADeviceContext::CUDADeviceContext(GPUPlace place) : place_(place) { eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get())); PADDLE_ENFORCE(dynload::cublasCreate(&cublas_handle_)); PADDLE_ENFORCE(dynload::cublasSetStream(cublas_handle_, stream_)); - PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_)); - PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_)); + if (dynload::HasCUDNN()) { + PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_)); + PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream_)); + } else { + cudnn_handle_ = nullptr; + } } CUDADeviceContext::~CUDADeviceContext() { SetDeviceId(place_.device); Wait(); PADDLE_ENFORCE(dynload::cublasDestroy(cublas_handle_)); - PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_)); + if (cudnn_handle_ != nullptr) { + PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_)); + } eigen_stream_.reset(); eigen_device_.reset(); PADDLE_ENFORCE(cudaStreamDestroy(stream_)); @@ -125,21 +166,69 @@ cudnnHandle_t CUDADeviceContext::cudnn_handle() const { return cudnn_handle_; } cudaStream_t CUDADeviceContext::stream() const { return stream_; } -CUDNNDeviceContext::CUDNNDeviceContext(CUDNNPlace place) - : CUDADeviceContext(place), place_(place) { - PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_)); - PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, stream())); +#endif + +#ifdef PADDLE_WITH_MKLDNN +MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) + : CPUDeviceContext(place), ready_(false) { + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0)); } -CUDNNDeviceContext::~CUDNNDeviceContext() { - SetDeviceId(place_.device); - Wait(); - PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_)); +template +void MKLDNNDeviceContext::AddElement(const std::string& op_key, + const T& value) { + if (GetElement(op_key)) { + return; + } + GetElementPool().emplace(op_key, std::move(value)); +} + +template +const T& MKLDNNDeviceContext::GetElement(const std::string& op_key) const { + auto it = GetElementPool().find(op_key); + return it == GetElementPool().end() ? nullptr : it->second; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return memory_pool_; +} + +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return primitive_pool_; } -Place CUDNNDeviceContext::GetPlace() const { return CUDNNPlace(); } +template <> +const std::unordered_map>& +MKLDNNDeviceContext::GetElementPool() const { + return primitive_desc_pool_; +} -cudnnHandle_t CUDNNDeviceContext::cudnn_handle() const { return cudnn_handle_; } +void MKLDNNDeviceContext::Execute(bool block) { + if (pipeline_.empty()) { + return; + } + ResetStream(); + stream_->submit(pipeline_).wait(block); + ready_ = false; + pipeline_.clear(); +} + +void MKLDNNDeviceContext::ResetStream() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have specific method to reset this state + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + ready_ = true; +} #endif diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index 6cc0508522a97f3097b30e3340e7413a7093714a..7a0040c9c229af79ea8be1049dfd6c0d1b4d19cf 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -11,8 +11,8 @@ limitations under the License. */ #pragma once -#include "paddle/platform/enforce.h" -#include "paddle/platform/place.h" +#include +#include #ifdef PADDLE_WITH_CUDA #include "paddle/platform/dynload/cublas.h" @@ -20,10 +20,17 @@ limitations under the License. */ #include "paddle/platform/gpu_info.h" #define EIGEN_USE_GPU #endif -#include + +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/platform/mkldnn_helper.h" +#endif + +#include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" +#include "glog/logging.h" + namespace paddle { namespace platform { @@ -49,13 +56,21 @@ class CPUDeviceContext : public DeviceContext { std::unique_ptr eigen_device_; }; +template +struct DefaultDeviceContextType; + +template <> +struct DefaultDeviceContextType { + using TYPE = CPUDeviceContext; +}; + #ifdef PADDLE_WITH_CUDA class EigenCudaStreamDevice; class CUDADeviceContext : public DeviceContext { public: - explicit CUDADeviceContext(GPUPlace place); + explicit CUDADeviceContext(CUDAPlace place); virtual ~CUDADeviceContext(); /*! \brief Wait for all operations completion in the stream. */ @@ -77,7 +92,7 @@ class CUDADeviceContext : public DeviceContext { cudaStream_t stream() const; private: - GPUPlace place_; + CUDAPlace place_; std::unique_ptr eigen_device_; std::unique_ptr eigen_stream_; @@ -87,23 +102,107 @@ class CUDADeviceContext : public DeviceContext { cublasHandle_t cublas_handle_; }; -class CUDNNDeviceContext : public CUDADeviceContext { +template <> +struct DefaultDeviceContextType { + using TYPE = CUDADeviceContext; +}; + +#endif + +#ifdef PADDLE_WITH_MKLDNN +class MKLDNNDeviceContext : public CPUDeviceContext { public: - explicit CUDNNDeviceContext(CUDNNPlace place); - virtual ~CUDNNDeviceContext(); + explicit MKLDNNDeviceContext(CPUPlace place); - /*! \brief Return place in the device context. */ - Place GetPlace() const final; + /* \brief Add new element: memory, primitive or primitive desc */ + template + void AddElement(const std::string& op_key, const T& value); - /*! \brief Return cudnn handle in the device context. */ - cudnnHandle_t cudnn_handle() const; + /* \brief Get existed element: memory, primitive or primitive desc */ + template + const T& GetElement(const std::string& op_key) const; + + /* \brief Get element pool: memory, primitive or primitive desc pool */ + template + const std::unordered_map>& + GetElementPool() const; + + /* \brief Get the active engine */ + const MKLDNNEngine& engine() const { return *engine_; } + + /* \brief Submit primitive to pipeline */ + void Submit(const MKLDNNPrimitivePtr& p) { pipeline_.push_back(*p); } + + /*! \brief Execute all submitted primitives in pipeline */ + void Execute(bool block = true); + + protected: + /*! \brief Reset the stream to prepare next exectue */ + void ResetStream(); private: - cudnnHandle_t cudnn_handle_; - CUDNNPlace place_; + std::unordered_map> + memory_pool_; + std::unordered_map> + primitive_pool_; + std::unordered_map> + primitive_desc_pool_; + std::vector pipeline_; + MKLDNNStreamPtr stream_; + MKLDNNEnginePtr engine_; + bool ready_; }; - #endif +/*! \brief device context pool singleton */ +class DeviceContextPool { + public: + explicit DeviceContextPool(const std::vector& places); + + static DeviceContextPool& Instance() { + PADDLE_ENFORCE_NOT_NULL(pool, "Need to Create DeviceContextPool first!"); + return *pool; + } + + /*! \brief Create should only called by Init function */ + static DeviceContextPool& Init(const std::vector& places) { + if (pool == nullptr) { + pool = new DeviceContextPool(places); + } + return *pool; + } + + /*! \brief Return handle of single device context. */ + const platform::DeviceContext* Get(const platform::Place& place); + + template + const typename DefaultDeviceContextType::TYPE* GetByPlace( + const Place& place) { + return reinterpret_cast< + const typename DefaultDeviceContextType::TYPE*>(Get(place)); + } + + private: + static DeviceContextPool* pool; + constexpr static int LEFT_SHIFT = 8; + struct Hash { + std::hash hash_; + size_t operator()(const platform::Place& place) const { + int pre_hash = place.which() << LEFT_SHIFT; + if (platform::is_gpu_place(place)) { + pre_hash += boost::get(place).GetDeviceId(); + } + return hash_(pre_hash); + } + }; + std::unordered_map + device_contexts_; + DISABLE_COPY_AND_ASSIGN(DeviceContextPool); +}; + } // namespace platform } // namespace paddle diff --git a/paddle/platform/device_context_test.cc b/paddle/platform/device_context_test.cu similarity index 54% rename from paddle/platform/device_context_test.cc rename to paddle/platform/device_context_test.cu index 109c13a8812dffac10d202cbc9d85c4e601bf197..767fe9b24a5d51630397a864c98928de52d21f31 100644 --- a/paddle/platform/device_context_test.cc +++ b/paddle/platform/device_context_test.cu @@ -12,17 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/platform/device_context.h" #include "gtest/gtest.h" +#include "paddle/platform/device_context.h" + +#include "glog/logging.h" TEST(Device, Init) { using paddle::platform::DeviceContext; using paddle::platform::CUDADeviceContext; - using paddle::platform::GPUPlace; + using paddle::platform::CUDAPlace; int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; i++) { - CUDADeviceContext* device_context = new CUDADeviceContext(GPUPlace(i)); + CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i)); Eigen::GpuDevice* gpu_device = device_context->eigen_device(); ASSERT_NE(nullptr, gpu_device); delete device_context; @@ -31,11 +33,11 @@ TEST(Device, Init) { TEST(Device, CUDADeviceContext) { using paddle::platform::CUDADeviceContext; - using paddle::platform::GPUPlace; + using paddle::platform::CUDAPlace; int count = paddle::platform::GetCUDADeviceCount(); for (int i = 0; i < count; i++) { - CUDADeviceContext* device_context = new CUDADeviceContext(GPUPlace(i)); + CUDADeviceContext* device_context = new CUDADeviceContext(CUDAPlace(i)); Eigen::GpuDevice* gpu_device = device_context->eigen_device(); ASSERT_NE(nullptr, gpu_device); cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); @@ -47,18 +49,38 @@ TEST(Device, CUDADeviceContext) { } } -TEST(Device, CUDNNDeviceContext) { - using paddle::platform::CUDNNDeviceContext; - using paddle::platform::CUDNNPlace; - if (paddle::platform::dynload::HasCUDNN()) { - int count = paddle::platform::GetCUDADeviceCount(); - for (int i = 0; i < count; ++i) { - CUDNNDeviceContext* device_context = - new CUDNNDeviceContext(CUDNNPlace(i)); - cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); - ASSERT_NE(nullptr, cudnn_handle); - ASSERT_NE(nullptr, device_context->stream()); - delete device_context; - } +TEST(Device, DeviceContextPool) { + using paddle::platform::DeviceContextPool; + using paddle::platform::CUDADeviceContext; + using paddle::platform::Place; + using paddle::platform::CPUPlace; + using paddle::platform::CUDAPlace; + + DeviceContextPool& pool = DeviceContextPool::Instance(); + auto cpu_dev_ctx1 = pool.Get(CPUPlace()); + auto cpu_dev_ctx2 = pool.Get(CPUPlace()); + ASSERT_EQ(cpu_dev_ctx2, cpu_dev_ctx1); + + std::vector gpu_places; + int count = paddle::platform::GetCUDADeviceCount(); + for (int i = 0; i < count; ++i) { + auto dev_ctx = pool.Get(CUDAPlace(i)); + ASSERT_NE(dev_ctx, nullptr); + } +} + +int main(int argc, char** argv) { + std::vector places; + + places.emplace_back(paddle::platform::CPUPlace()); + int count = paddle::platform::GetCUDADeviceCount(); + for (int i = 0; i < count; ++i) { + places.emplace_back(paddle::platform::CUDAPlace(i)); } + + VLOG(0) << " DeviceCount " << count; + paddle::platform::DeviceContextPool::Init(places); + + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } diff --git a/paddle/platform/dynload/nccl.cc b/paddle/platform/dynload/nccl.cc index 91168f37effff3f8b864b6bb2ede070cb0a976fa..4cec829a8ad8994d4a7643613331881e3a397b9a 100644 --- a/paddle/platform/dynload/nccl.cc +++ b/paddle/platform/dynload/nccl.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/platform/dynload/nccl.h" diff --git a/paddle/platform/dynload/nccl.h b/paddle/platform/dynload/nccl.h index 11007c1031c6d224c475e9ef4f11e7797decd78e..6c776afc97a53c964f0bc2b2a8abf2c29f474d3f 100644 --- a/paddle/platform/dynload/nccl.h +++ b/paddle/platform/dynload/nccl.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once @@ -63,6 +63,8 @@ extern void LoadNCCLDSO(); __macro(ncclAllReduce); \ __macro(ncclBcast); \ __macro(ncclAllGather); \ + __macro(ncclGroupStart); \ + __macro(ncclGroupEnd); \ __macro(ncclReduce); \ __macro(ncclGetErrorString); diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index 5abd4d4a345ed2750231841325f2b19a2ee8c4c9..d1c7be0790b5e11d6273efe6c08cdb7bf22425c6 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -22,6 +22,7 @@ limitations under the License. */ #include #include +#include "paddle/platform/macros.h" #include "paddle/string/printf.h" #include "paddle/string/to_string.h" diff --git a/paddle/platform/for_range.h b/paddle/platform/for_range.h new file mode 100644 index 0000000000000000000000000000000000000000..694a66d9ac4eb6ad02daf1931806fa1287de7cab --- /dev/null +++ b/paddle/platform/for_range.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace platform { + +template +struct ForRange { + ForRange(const DeviceContext& dev_ctx, size_t limit); + + template + void operator()(Function func) const; +}; + +template <> +struct ForRange { + ForRange(const CPUDeviceContext& dev_ctx, size_t limit) : limit_(limit) {} + + template + void operator()(Function func) const { + for (size_t i = 0; i < limit_; ++i) { + func(i); + } + } + + size_t limit_; +}; + +#ifdef __NVCC__ +template +__global__ static void ForRangeElemwiseOpGridIsOne(Function func) { + size_t idx = static_cast(threadIdx.x); + func(idx); +} + +template +__global__ static void ForRangeElemwiseOp(Function func, int limit) { + size_t idx = static_cast(blockIdx.x * blockDim.x + threadIdx.x); + if (idx < limit) { + func(idx); + } +} + +template <> +struct ForRange { + ForRange(const CUDADeviceContext& dev_ctx, size_t limit) + : dev_ctx_(dev_ctx), limit_(static_cast(limit)) {} + + template + inline void operator()(Function func) const { + constexpr int num_threads = 1024; + int block_size = limit_ <= num_threads ? limit_ : num_threads; + int grid_size = (limit_ + num_threads - 1) / num_threads; + + if (grid_size == 1) { + ForRangeElemwiseOpGridIsOne<<<1, block_size, 0, dev_ctx_.stream()>>>( + func); + } else { + ForRangeElemwiseOp<<>>( + func, limit_); + } + } + + const CUDADeviceContext& dev_ctx_; + int limit_; +}; + +#endif + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/mkldnn_helper.h b/paddle/platform/mkldnn_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..cd52a8b4c434071a030a8e7a8a70fc3adba8460c --- /dev/null +++ b/paddle/platform/mkldnn_helper.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +using MKLDNNStream = mkldnn::stream; +using MKLDNNEngine = mkldnn::engine; +using MKLDNNMemory = mkldnn::memory; +using MKLDNNPrimitive = mkldnn::primitive; +using MKLDNNPrimitiveDesc = mkldnn::handle; + +typedef std::unique_ptr MKLDNNStreamPtr; +typedef std::unique_ptr MKLDNNEnginePtr; +typedef std::unique_ptr MKLDNNMemoryPtr; +typedef std::unique_ptr MKLDNNPrimitivePtr; +typedef std::unique_ptr MKLDNNPrimitiveDescPtr; + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/nccl_test.cu b/paddle/platform/nccl_test.cu index 94ab360a1967d22e73bc6aefc0301487537c97f7..ef6d845874745af1150e4425f8d6be416cc44ece 100644 --- a/paddle/platform/nccl_test.cu +++ b/paddle/platform/nccl_test.cu @@ -1,28 +1,30 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include #include "glog/logging.h" #include "gtest/gtest.h" + +#include "paddle/framework/init.h" #include "paddle/platform/device_context.h" #include "paddle/platform/dynload/nccl.h" #include "paddle/platform/enforce.h" #include "paddle/platform/gpu_info.h" -#include -#include -#include - static int dev_count = 0; namespace paddle { @@ -31,7 +33,8 @@ namespace platform { TEST(NCCL, init) { std::vector comms; comms.resize(dev_count); - dynload::ncclCommInitAll(comms.data(), dev_count, nullptr); + PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr)); + for (int i = 0; i < dev_count; ++i) { dynload::ncclCommDestroy(comms[i]); } @@ -47,7 +50,7 @@ struct PerThreadData { T* RecvBuff() { return thrust::raw_pointer_cast(recv_buff.data()); } - PerThreadData(int gpu_id, size_t size) : dev_ctx(GPUPlace(gpu_id)) { + PerThreadData(int gpu_id, size_t size) : dev_ctx(CUDAPlace(gpu_id)) { send_buff.resize(size); for (size_t i = 0; i < size; ++i) { send_buff[i] = static_cast(i); @@ -131,6 +134,18 @@ int main(int argc, char** argv) { << dev_count; return 0; } + + std::vector places; + + places.emplace_back(paddle::platform::CPUPlace()); + int count = paddle::platform::GetCUDADeviceCount(); + for (int i = 0; i < count; ++i) { + places.emplace_back(paddle::platform::CUDAPlace(i)); + } + + VLOG(0) << " DeviceCount " << count; + paddle::platform::DeviceContextPool::Init(places); + testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index 25fe8d21b49b07a6afe2938245906dc1bdd90398..249527e3e136992970033c44ad490a1744bfed35 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/platform/place.h" @@ -23,8 +23,9 @@ class PlacePrinter : public boost::static_visitor<> { public: explicit PlacePrinter(std::ostream &os) : os_(os) {} void operator()(const CPUPlace &) { os_ << "CPUPlace"; } - void operator()(const MKLDNNPlace &) { os_ << "MKLDNNPlace"; } - void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } + void operator()(const CUDAPlace &p) { + os_ << "CUDAPlace(" << p.device << ")"; + } private: std::ostream &os_; @@ -37,20 +38,14 @@ static Place the_default_place; void set_place(const Place &place) { the_default_place = place; } const Place &get_place() { return the_default_place; } -const GPUPlace default_gpu() { return GPUPlace(0); } +const CUDAPlace default_gpu() { return CUDAPlace(0); } const CPUPlace default_cpu() { return CPUPlace(); } -const MKLDNNPlace default_mkldnn() { return MKLDNNPlace(); } bool is_gpu_place(const Place &p) { - return boost::apply_visitor(IsGPUPlace(), p); -} -bool is_cpu_place(const Place &p) { - return !is_gpu_place(p) && !is_mkldnn_place(p); + return boost::apply_visitor(IsCUDAPlace(), p); } -bool is_mkldnn_place(const Place &p) { - return boost::apply_visitor(IsMKLDNNPlace(), p); -} +bool is_cpu_place(const Place &p) { return !is_gpu_place(p); } bool places_are_same_class(const Place &p1, const Place &p2) { return p1.which() == p2.which(); diff --git a/paddle/platform/place.h b/paddle/platform/place.h index ca98920d414bc87ce243995a42e5672d0e61e108..76b5c502cc48431a4e9b13b07505978884576e1d 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include - +#include "paddle/platform/enforce.h" #include "paddle/platform/variant.h" namespace paddle { @@ -31,68 +31,64 @@ struct CPUPlace { inline bool operator!=(const CPUPlace &) const { return false; } }; -struct MKLDNNPlace { - MKLDNNPlace() {} - - // needed for variant equality comparison - inline bool operator==(const MKLDNNPlace &) const { return true; } - inline bool operator!=(const MKLDNNPlace &) const { return false; } -}; - -struct GPUPlace { - GPUPlace() : GPUPlace(0) {} - explicit GPUPlace(int d) : device(d) {} +struct CUDAPlace { + CUDAPlace() : CUDAPlace(0) {} + explicit CUDAPlace(int d) : device(d) {} inline int GetDeviceId() const { return device; } // needed for variant equality comparison - inline bool operator==(const GPUPlace &o) const { return device == o.device; } - inline bool operator!=(const GPUPlace &o) const { return !(*this == o); } + inline bool operator==(const CUDAPlace &o) const { + return device == o.device; + } + inline bool operator!=(const CUDAPlace &o) const { return !(*this == o); } int device; }; -struct CUDNNPlace : public GPUPlace { - CUDNNPlace() : GPUPlace() {} - explicit CUDNNPlace(int d) : GPUPlace(d) {} -}; - -struct IsGPUPlace : public boost::static_visitor { +struct IsCUDAPlace : public boost::static_visitor { bool operator()(const CPUPlace &) const { return false; } - bool operator()(const MKLDNNPlace &) const { return false; } - bool operator()(const GPUPlace &gpu) const { return true; } + bool operator()(const CUDAPlace &gpu) const { return true; } }; -struct IsMKLDNNPlace : public boost::static_visitor { - bool operator()(const MKLDNNPlace &) const { return true; } - bool operator()(const CPUPlace &) const { return false; } - bool operator()(const GPUPlace &) const { return false; } -}; - -// Define the max number of Place in bit length. i.e., the max number of places -// should be less equal than 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT) -#define NUM_PLACE_TYPE_LIMIT_IN_BIT 4 - -typedef boost::variant Place; - -// static check number of place types is less equal than -// 2^(NUM_PLACE_TYPE_LIMIT_IN_BIT) -BOOST_MPL_ASSERT((boost::mpl::less_equal< - Place::types::size, - boost::mpl::long_<1 << NUM_PLACE_TYPE_LIMIT_IN_BIT>>)); +typedef boost::variant Place; void set_place(const Place &); const Place &get_place(); -const GPUPlace default_gpu(); +const CUDAPlace default_gpu(); const CPUPlace default_cpu(); -const MKLDNNPlace default_mkldnn(); bool is_gpu_place(const Place &); bool is_cpu_place(const Place &); -bool is_mkldnn_place(const Place &); bool places_are_same_class(const Place &, const Place &); std::ostream &operator<<(std::ostream &, const Place &); +template +struct PlaceVisitorWrapper + : public boost::static_visitor { + const Visitor &visitor_; + explicit PlaceVisitorWrapper(const Visitor &visitor) : visitor_(visitor) {} + + typename Visitor::result_type operator()(const CPUPlace &cpu) const { + return visitor_(cpu); + } + + typename Visitor::result_type operator()(const CUDAPlace &cuda) const { +#ifdef PADDLE_WITH_CUDA + return visitor_(cuda); +#else + PADDLE_THROW("Paddle is not compiled with CUDA. Cannot visit cuda device"); + return typename Visitor::result_type(); +#endif + } +}; + +template +typename Visitor::result_type VisitPlace(const Place &place, + const Visitor &visitor) { + return boost::apply_visitor(PlaceVisitorWrapper(visitor), place); +} + } // namespace platform } // namespace paddle diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc index c536b59ed8f71bd078bd09c5bd5afeab74c71b28..4f1eba01df5531529ad3c79648b5e7f8651df619 100644 --- a/paddle/platform/place_test.cc +++ b/paddle/platform/place_test.cc @@ -4,45 +4,34 @@ TEST(Place, Equality) { paddle::platform::CPUPlace cpu; - paddle::platform::GPUPlace g0(0), g1(1), gg0(0); - paddle::platform::CUDNNPlace d0(0), d1(1), dd0(0); + paddle::platform::CUDAPlace g0(0), g1(1), gg0(0); EXPECT_EQ(cpu, cpu); EXPECT_EQ(g0, g0); EXPECT_EQ(g1, g1); EXPECT_EQ(g0, gg0); - EXPECT_EQ(d0, dd0); EXPECT_NE(g0, g1); - EXPECT_NE(d0, d1); EXPECT_TRUE(paddle::platform::places_are_same_class(g0, gg0)); EXPECT_FALSE(paddle::platform::places_are_same_class(g0, cpu)); - - EXPECT_TRUE(paddle::platform::is_gpu_place(d0)); - EXPECT_FALSE(paddle::platform::places_are_same_class(g0, d0)); } TEST(Place, Default) { EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::get_place())); EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu())); - EXPECT_TRUE( - paddle::platform::is_mkldnn_place(paddle::platform::default_mkldnn())); + EXPECT_FALSE(paddle::platform::is_cpu_place(paddle::platform::get_place())); paddle::platform::set_place(paddle::platform::CPUPlace()); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place())); - - paddle::platform::set_place(paddle::platform::MKLDNNPlace()); - EXPECT_FALSE(paddle::platform::is_cpu_place(paddle::platform::get_place())); - EXPECT_TRUE(paddle::platform::is_mkldnn_place(paddle::platform::get_place())); } TEST(Place, Print) { { std::stringstream ss; - ss << paddle::platform::GPUPlace(1); - EXPECT_EQ("GPUPlace(1)", ss.str()); + ss << paddle::platform::CUDAPlace(1); + EXPECT_EQ("CUDAPlace(1)", ss.str()); } { std::stringstream ss; diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc new file mode 100644 index 0000000000000000000000000000000000000000..4e89e5c600bf7f6e23faf8d62bc7c72f7ee8ca7a --- /dev/null +++ b/paddle/platform/profiler.cc @@ -0,0 +1,173 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/profiler.h" + +namespace paddle { +namespace platform { + +// The profiler state, the initial value is ProfilerState::kDisabled +static ProfilerState g_state = ProfilerState::kDisabled; +// The thread local event list only can be accessed by the specific thread +// The thread index of each thread +static thread_local int32_t g_thread_id; +// The g_next_thread_id is a global counter for threads, by the g_thread_id and +// g_next_thread_id, we can know how many threads have created EventList. +static uint32_t g_next_thread_id = 0; +// The global mutex +static std::mutex g_all_event_lists_mutex; +// The total event lists of all threads +static std::list> g_all_event_lists; +// The thread local event list only can be accessed by the specific thread +static thread_local std::shared_ptr g_event_list; + +inline uint64_t GetTimeInNsec() { + using clock = std::conditional::type; + return std::chrono::duration_cast( + clock::now().time_since_epoch()) + .count(); +} + +Event::Event(EventKind kind, std::string name, uint32_t thread_id, + DeviceContext* dev_ctx) + : kind_(kind), + name_(std::move(name)), + thread_id_(thread_id), + has_cuda_(false) { +#ifdef PADDLE_WITH_CUDA + auto* cuda_dev_ctx = static_cast(dev_ctx); + if (cuda_dev_ctx) { + PADDLE_ENFORCE(cudaGetDevice(&device_)); + PADDLE_ENFORCE(cudaEventCreate(&event_)); + auto stream = cuda_dev_ctx->stream(); + PADDLE_ENFORCE(cudaEventRecord(event_, stream)); + has_cuda_ = true; + } +#endif + cpu_ns_ = GetTimeInNsec(); +} + +std::string Event::kind() const { + switch (kind_) { + case EventKind::kMark: + return "mark"; + case EventKind::kPushRange: + return "push"; + case EventKind::kPopRange: + return "pop"; + } + PADDLE_THROW("Unknown EventKind."); +} + +double Event::CpuElapsedUs(const Event& e) const { + return (e.cpu_ns_ - cpu_ns_) / (1000.0); +} + +double Event::CudaElapsedUs(const Event& e) const { +#ifdef PADDLE_WITH_CUDA + PADDLE_ENFORCE(e.has_cuda() && has_cuda()); + PADDLE_ENFORCE(e.device() == device()); + PADDLE_ENFORCE(cudaEventSynchronize(event_)); + PADDLE_ENFORCE(cudaEventSynchronize(e.event())); + float ms; + PADDLE_ENFORCE(cudaEventElapsedTime(&ms, event_, e.event())); + return ms * 1000.0; +#else + PADDLE_THROW("CUDA is not enabled"); +#endif +} + +#ifdef PADDLE_WITH_CUDA +static void ForEachDevice(std::function func) { + auto original_device = GetCurrentDeviceId(); + int count = GetCUDADeviceCount(); + for (int i = 0; i < count; i++) { + SetDeviceId(i); + func(i); + } + SetDeviceId(original_device); +} +#endif + +inline EventList& GetEventList() { + if (!g_event_list) { + std::lock_guard guard(g_all_event_lists_mutex); + g_event_list = std::make_shared(); + g_thread_id = g_next_thread_id++; + g_all_event_lists.emplace_front(g_event_list); + } + return *g_event_list; +} + +void Mark(const std::string& name, DeviceContext* dev_ctx) { + GetEventList().Record(EventKind::kMark, std::move(name), g_thread_id, + dev_ctx); +} + +RecordEvent::RecordEvent(const std::string& name, DeviceContext* dev_ctx) { + if (g_state == ProfilerState::kDisabled) return; + dev_ctx_ = dev_ctx; + GetEventList().Record(EventKind::kPushRange, std::move(name), g_thread_id, + dev_ctx_); +} + +RecordEvent::~RecordEvent() { + if (g_state == ProfilerState::kDisabled) return; + GetEventList().Record(EventKind::kPopRange, std::string(), g_thread_id, + dev_ctx_); +} + +void EnableProfiler(ProfilerState state) { + PADDLE_ENFORCE(state != ProfilerState::kDisabled, + "Can't enbale profling, since the input state is ", + "ProfilerState::kDisabled"); + PADDLE_ENFORCE(g_state == ProfilerState::kDisabled, + "The profiling state should be disabled when calling ", + "EnableProfiler."); + g_state = state; +#ifdef PADDLE_WITH_CUDA + if (g_state == ProfilerState::kCUDA) { + // Generate some dummy evenets first to reduce the startup overhead. + for (int i = 0; i < 5; i++) { + ForEachDevice([](int d) { + DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(d)); + Mark("_cuda_startup_", dev_ctx); + dev_ctx->Wait(); + }); + } + } +#endif + // Mark the profiling start. + Mark("_start_profiler_", nullptr); +} + +std::vector> DisableProfiler() { + PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, + "Can't disable profiling, since it's not starting."); + // Mark the profiling stop. + Mark("_stop_profiler_", nullptr); + g_state = ProfilerState::kDisabled; + std::vector> result; + std::lock_guard guard(g_all_event_lists_mutex); + for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); + ++it) { + result.emplace_back((*it)->Reduce()); + } + return result; +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..47104ea9d08dad373888c0af463d2b1dbe72a269 --- /dev/null +++ b/paddle/platform/profiler.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace platform { + +enum EventKind { kMark, kPushRange, kPopRange }; + +class Event { + public: + // The DeviceContext is used to get the cuda stream. + // If CPU profiling mode, can pass nullptr. + Event(EventKind kind, std::string name, uint32_t thread_id, + DeviceContext* dev_ctx); + + std::string kind() const; + std::string name() const { return name_; } + bool has_cuda() const { return has_cuda_; } + +#ifdef PADDLE_WITH_CUDA + cudaEvent_t event() const { return event_; } + int device() const { return device_; } +#endif + + double CpuElapsedUs(const Event& e) const; + double CudaElapsedUs(const Event& e) const; + + private: + EventKind kind_; + std::string name_; + uint32_t thread_id_; + int64_t cpu_ns_; + bool has_cuda_; +#ifdef PADDLE_WITH_CUDA + cudaEvent_t event_ = nullptr; + int device_ = -1; +#endif +}; + +struct EventList { + constexpr static size_t kMB = 1024 * 1024; + constexpr static size_t kEventBlockSize = 16 * kMB; + constexpr static size_t kEventSize = sizeof(Event); + constexpr static size_t kEventAlign = alignof(Event); + constexpr static size_t kNumBlock = + kEventBlockSize / + ((kEventSize + kEventAlign - 1) / kEventAlign * kEventAlign); + + template + void Record(Args&&... args) { + if (event_blocks.empty() || event_blocks.front().size() == kNumBlock) { + event_blocks.emplace_front(); + event_blocks.front().reserve(kNumBlock); + } + event_blocks.front().emplace_back(std::forward(args)...); + } + + std::vector Reduce() { + std::vector result; + for (auto& block : event_blocks) { + result.insert(result.begin(), std::make_move_iterator(block.begin()), + std::make_move_iterator(block.end())); + } + event_blocks.clear(); + return result; + } + + std::forward_list> event_blocks; +}; + +enum ProfilerState { + kDisabled, // disabled state + kCPU, // CPU profiling state + kCUDA, // GPU profiling state +}; + +void Mark(const std::string& name, DeviceContext* dev_ctx); + +struct RecordEvent { + explicit RecordEvent(const std::string& name, DeviceContext* dev_ctx); + + ~RecordEvent(); + + // The device context is used by Event to get the current cuda stream. + DeviceContext* dev_ctx_; +}; + +// Enable the profiling function. +void EnableProfiler(ProfilerState state); + +// Return the event list of all threads. Asummed the returned value calls +// event_lists, event_lists[i][j] represents the j-th Event of i-th thread. +std::vector> DisableProfiler(); + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/profiler_test.cc b/paddle/platform/profiler_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..47cf7be146121c54300df928fe329268ed975373 --- /dev/null +++ b/paddle/platform/profiler_test.cc @@ -0,0 +1,98 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/profiler.h" +#include "gtest/gtest.h" + +TEST(Event, CpuElapsedTime) { + using paddle::platform::Event; + using paddle::platform::EventKind; + + Event start_event(EventKind::kPushRange, "test", 0, nullptr); + EXPECT_TRUE(start_event.has_cuda() == false); + int counter = 0; + while (counter != 1000) { + counter++; + } + Event stop_event(EventKind::kPopRange, "test", 0, nullptr); + EXPECT_GT(start_event.CpuElapsedUs(stop_event), 0); +} + +#ifdef PADDLE_WITH_CUDA +TEST(Event, CudaElapsedTime) { + using paddle::platform::DeviceContext; + using paddle::platform::CUDADeviceContext; + using paddle::platform::CUDAPlace; + using paddle::platform::Event; + using paddle::platform::EventKind; + + DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(0)); + Event start_event(EventKind::kPushRange, "test", 0, dev_ctx); + EXPECT_TRUE(start_event.has_cuda() == true); + int counter = 0; + while (counter != 1000) { + counter++; + } + Event stop_event(EventKind::kPopRange, "test", 0, dev_ctx); + EXPECT_GT(start_event.CudaElapsedUs(stop_event), 0); +} +#endif + +TEST(RecordEvent, RecordEvent) { + using paddle::platform::DeviceContext; + using paddle::platform::Event; + using paddle::platform::EventKind; + using paddle::platform::RecordEvent; + using paddle::platform::ProfilerState; + + ProfilerState state = ProfilerState::kCPU; + DeviceContext* dev_ctx = nullptr; +#ifdef PADDLE_WITH_CUDA + using paddle::platform::CUDADeviceContext; + using paddle::platform::CUDAPlace; + state = ProfilerState::kCUDA; + dev_ctx = + new paddle::platform::CUDADeviceContext(paddle::platform::CUDAPlace(0)); +#endif + EnableProfiler(state); + + for (int i = 1; i < 5; ++i) { + std::string name = "op_" + std::to_string(i); + RecordEvent record_event(name, dev_ctx); + int counter = 1; + while (counter != i * 1000) counter++; + } + std::vector> events = paddle::platform::DisableProfiler(); + int cuda_startup_count = 0; + int start_profiler_count = 0; + int stop_profiler_count = 0; + for (size_t i = 0; i < events.size(); ++i) { + for (size_t j = 0; j < events[i].size(); ++j) { + if (events[i][j].name() == "_cuda_startup_") ++cuda_startup_count; + if (events[i][j].name() == "_start_profiler_") ++start_profiler_count; + if (events[i][j].name() == "_stop_profiler_") ++stop_profiler_count; + if (events[i][j].name() == "push") { + EXPECT_EQ(events[i][j + 1].name(), "pop"); +#ifdef PADDLE_WITH_CUDA + EXPECT_GT(events[i][j].CudaElapsedUs(events[i][j + 1]), 0); +#else + EXPECT_GT(events[i][j].CpuElapsedUs(events[i][j + 1]), 0); +#endif + } + } + } + EXPECT_EQ(cuda_startup_count % 5, 0); + EXPECT_EQ(start_profiler_count, 1); + EXPECT_EQ(stop_profiler_count, 1); +} diff --git a/paddle/platform/transform.h b/paddle/platform/transform.h index 148ebaed3d893cd03df8cf27b1309d07afd9aa4a..a88902b164c7a705cae09662e724b35cd8c8b4bf 100644 --- a/paddle/platform/transform.h +++ b/paddle/platform/transform.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/platform/transform_test.cu b/paddle/platform/transform_test.cu index 464096111e4a85b8d64d9223bfb85a1d1d42fad4..af9204a0a7b6e09fcfdacb8ba985e269665b4034 100644 --- a/paddle/platform/transform_test.cu +++ b/paddle/platform/transform_test.cu @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include #include "paddle/memory/memcpy.h" @@ -49,7 +49,7 @@ TEST(Transform, CPUUnary) { TEST(Transform, GPUUnary) { using namespace paddle::platform; using namespace paddle::memory; - GPUPlace gpu0(0); + CUDAPlace gpu0(0); CUDADeviceContext ctx(gpu0); float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; float* gpu_buf = static_cast(Alloc(gpu0, sizeof(float) * 4)); @@ -80,7 +80,7 @@ TEST(Transform, GPUBinary) { using namespace paddle::platform; using namespace paddle::memory; int buf[4] = {1, 2, 3, 4}; - GPUPlace gpu0(0); + CUDAPlace gpu0(0); CUDADeviceContext ctx(gpu0); int* gpu_buf = static_cast(Alloc(gpu0, sizeof(buf))); Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf), ctx.stream()); diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h index 284b4c42ac068b23737c12d3f147bbceb135dacc..ea6ef8fddf5171731fa1fd323983bf7d90ecdd9a 100644 --- a/paddle/platform/variant.h +++ b/paddle/platform/variant.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 6afed7eec7001b646d55cef0bc3f59782b80b15f..7b374307071d2da91a677361b404448f1a3816b0 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -3,6 +3,9 @@ if(WITH_PYTHON) SRCS pybind.cc exception.cc protobuf.cc const_value.cc DEPS pybind python backward proto_desc paddle_memory executor prune init ${GLOB_OP_LIB}) + if(NOT APPLE AND NOT ANDROID) + target_link_libraries(paddle_pybind rt) + endif(NOT APPLE AND NOT ANDROID) endif(WITH_PYTHON) if(WITH_DOC) diff --git a/paddle/pybind/exception.cc b/paddle/pybind/exception.cc index ff79b12ee4b28c53ee04f4c170b5bca9ca28d14a..e29ac3ebab760a011a3798f8e4be46270d6b80cc 100644 --- a/paddle/pybind/exception.cc +++ b/paddle/pybind/exception.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/pybind/exception.h" diff --git a/paddle/pybind/exception.h b/paddle/pybind/exception.h index 70beac146046f74e23f747bab130483901a7d443..436ddd5707ace37a5668c8d4401c1bdcf2dadfe3 100644 --- a/paddle/pybind/exception.h +++ b/paddle/pybind/exception.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index f105370f226e2cceaac685f280d55134d4291028..564a3700011c1b0c294c27af4b86ab967a0f0e1e 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -171,12 +171,23 @@ void BindBlockDesc(py::module &m) { std::string name = byte_name; return self.HasVar(name); }) + .def("has_var_recursive", + [](BlockDesc &self, py::bytes byte_name) { + std::string name = byte_name; + return self.HasVarRecursive(name); + }) .def("find_var", [](BlockDesc &self, py::bytes byte_name) { std::string name = byte_name; return self.FindVar(name); }, py::return_value_policy::reference) + .def("find_var_recursive", + [](BlockDesc &self, py::bytes byte_name) { + std::string name = byte_name; + return self.FindVarRecursive(name); + }, + py::return_value_policy::reference) .def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference) .def("op_size", &BlockDesc::OpSize) .def("op", &BlockDesc::Op, py::return_value_policy::reference) @@ -204,8 +215,8 @@ void BindVarDsec(py::module &m) { .def("set_shape", &VarDesc::SetShape) .def("set_dtype", &VarDesc::SetDataType) .def("shape", &VarDesc::Shape, py::return_value_policy::reference) - .def("dtype", &VarDesc::GetDataType) - .def("lod_level", &VarDesc::GetLodLevel) + .def("dtype", &VarDesc::GetDataType, py::return_value_policy::reference) + .def("lod_level", &VarDesc::GetLoDLevel) .def("set_lod_level", &VarDesc::SetLoDLevel) .def("type", &VarDesc::GetType) .def("set_type", &VarDesc::SetType) @@ -236,14 +247,22 @@ void BindOpDesc(py::module &m) { .value("BLOCK", proto::AttrType::BLOCK); py::class_ op_desc(m, "OpDesc", ""); - op_desc.def("type", &OpDesc::Type) + op_desc + .def("__init__", [](OpDesc &self) { new (&self) OpDesc(); }, + py::return_value_policy::reference) + .def("copy_from", &OpDesc::CopyFrom) + .def("type", &OpDesc::Type) .def("set_type", &OpDesc::SetType) .def("input", &OpDesc::Input) .def("input_names", &OpDesc::InputNames) - .def("set_input", &OpDesc::SetInput) .def("output", &OpDesc::Output) .def("output_names", &OpDesc::OutputNames) + .def("set_input", &OpDesc::SetInput) .def("set_output", &OpDesc::SetOutput) + .def("input_arg_names", &OpDesc::InputArgumentNames) + .def("output_arg_names", &OpDesc::OutputArgumentNames) + .def("rename_input", &OpDesc::RenameInput) + .def("rename_output", &OpDesc::RenameOutput) .def("has_attr", &OpDesc::HasAttr) .def("attr_type", &OpDesc::GetAttrType) .def("attr_names", &OpDesc::AttrNames) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 2d7fe251416dce629dd0a2318aaa020ec9668d9b..5d170c66e97f56440968ba568167e6845631e1cc 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -78,8 +78,12 @@ PYBIND11_PLUGIN(core) { [](Tensor &self, const std::vector &dim) { self.Resize(make_ddim(dim)); }) + .def("set_layout", + [](Tensor &self, const std::string &layout) { + self.set_layout(StringToDataLayout(layout)); + }) .def("alloc_float", - [](Tensor &self, paddle::platform::GPUPlace &place) { + [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("alloc_float", @@ -91,7 +95,7 @@ PYBIND11_PLUGIN(core) { self.mutable_data(place); }) .def("alloc_int", - [](Tensor &self, paddle::platform::GPUPlace &place) { + [](Tensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("set", PyCPUTensorSetFromArray) @@ -265,23 +269,22 @@ All parameter, weight, gradient are variables in Paddle. } return ret_values; }); - m.def("get_grad_op_descs", - [](const OpDesc &op_desc, - const std::unordered_set &no_grad_set, - std::unordered_map &grad_to_var, - const std::vector &grad_sub_block) { - std::vector> grad_op_descs = - framework::OpInfoMap::Instance() - .Get(op_desc.Type()) - .GradOpMaker()(op_desc, no_grad_set, &grad_to_var, - grad_sub_block); - std::vector grad_op_desc_ptrs(grad_op_descs.size()); - std::transform( - grad_op_descs.begin(), grad_op_descs.end(), - grad_op_desc_ptrs.begin(), - [](std::unique_ptr &p) { return p.release(); }); - return grad_op_desc_ptrs; - }); + m.def( + "get_grad_op_desc", [](const OpDesc &op_desc, + const std::unordered_set &no_grad_set, + const std::vector &grad_sub_block) { + std::unordered_map grad_to_var; + std::vector> grad_op_descs = + framework::OpInfoMap::Instance() + .Get(op_desc.Type()) + .GradOpMaker()(op_desc, no_grad_set, &grad_to_var, + grad_sub_block); + std::vector grad_op_desc_ptrs(grad_op_descs.size()); + std::transform(grad_op_descs.begin(), grad_op_descs.end(), + grad_op_desc_ptrs.begin(), + [](std::unique_ptr &p) { return p.release(); }); + return std::make_pair(grad_op_desc_ptrs, grad_to_var); + }); m.def("prune", [](const ProgramDesc &origin, const std::vector> &targets) { ProgramDesc prog_with_targets(origin); @@ -297,6 +300,8 @@ All parameter, weight, gradient are variables in Paddle. InferenceOptimize(*(origin.Proto()), &pruned_desc); return new ProgramDesc(pruned_desc); }); + m.def("empty_var_name", []() { return framework::kEmptyVarName; }); + m.def("grad_var_suffix", []() { return framework::kGradVarSuffix; }); m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") @@ -310,10 +315,10 @@ All parameter, weight, gradient are variables in Paddle. return new paddle::platform::CPUDeviceContext(); }) .def_static("create", - [](paddle::platform::GPUPlace& place) + [](paddle::platform::CUDAPlace& place) -> paddle::platform::DeviceContext* { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("GPUPlace is not supported in CPU device."); + PADDLE_THROW("CUDAPlace is not supported in CPU device."); #else return new paddle::platform::CUDADeviceContext(place); #endif @@ -323,9 +328,9 @@ All parameter, weight, gradient are variables in Paddle. #ifdef PADDLE_WITH_CUDA py::class_(m, "Communicator").def(py::init<>()); #endif - py::class_(m, "GPUPlace") + py::class_(m, "CUDAPlace") .def(py::init()) - .def("__str__", string::to_string); + .def("__str__", string::to_string); py::class_(m, "CPUPlace") .def(py::init<>()) @@ -338,7 +343,7 @@ All parameter, weight, gradient are variables in Paddle. self = cpu_place; }) .def("set_place", - [](platform::Place &self, const platform::GPUPlace &gpu_place) { + [](platform::Place &self, const platform::CUDAPlace &gpu_place) { self = gpu_place; }); @@ -360,10 +365,10 @@ All parameter, weight, gradient are variables in Paddle. }) .def("run", [](OperatorBase &self, const Scope &scope, - const platform::DeviceContext &dev_ctx) { - self.Run(scope, dev_ctx); - dev_ctx.Wait(); - }) + const platform::CPUPlace &place) { self.Run(scope, place); }) + .def("run", + [](OperatorBase &self, const Scope &scope, + const platform::CUDAPlace &place) { self.Run(scope, place); }) .def("type", [](const OperatorBase &op) -> std::string { return op.Type(); }) .def("outputs", @@ -417,13 +422,20 @@ All parameter, weight, gradient are variables in Paddle. }); py::class_(m, "Executor") - .def(py::init &>()) + .def(py::init()) .def("run", &Executor::Run); m.def("unique_integer", UniqueIntegerGenerator); m.def("init_gflags", framework::InitGflags); + m.def("init_glog", framework::InitGLOG); m.def("init_devices", &framework::InitDevices); + m.def("use_cpu", framework::UseCPU); + m.def("use_mkldnn", framework::UseMKLDNN); + m.def("use_cuda", framework::UseCUDA); + m.def("use_cudnn", framework::UseCUDNN); + m.def("use_all", framework::UseALL); + m.def("is_compile_gpu", IsCompileGPU); m.def("set_feed_variable", framework::SetFeedVariable); m.def("get_fetch_variable", framework::GetFetchVariable); diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index 268a0f2fa386adf99f7ea1589ff1f301f943a68b..6b4290972bade585d1a0c2ae919a2e712bdf308c 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -1,22 +1,22 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include -#include "paddle/framework/executor.h" #include "paddle/framework/tensor.h" #include "paddle/memory/memcpy.h" +#include "paddle/platform/device_context.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" @@ -63,24 +63,24 @@ struct CastToPyBufferImpl { auto *dst_ptr = static_cast(dst_tensor.mutable_data( tensor.dims(), platform::CPUPlace())); - framework::DeviceContextPool &pool = - framework::DeviceContextPool::Get(); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); auto dev_ctx = static_cast( - pool.Borrow(tensor.place())); + pool.Get(tensor.place())); paddle::platform::GpuMemcpyAsync( dst_ptr, src_ptr, sizeof(CUR_TYPE) * tensor.numel(), cudaMemcpyDeviceToHost, dev_ctx->stream()); #else - PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); #endif } else if (paddle::platform::is_cpu_place(tensor.place())) { dst_tensor = tensor; } - return py::buffer_info( - dst_tensor.mutable_data(dst_tensor.place()), - sizeof(CUR_TYPE), py::format_descriptor::format(), - (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides); + return py::buffer_info(dst_tensor.data(), sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(dst_tensor.dims()), + dims_outside, strides); } else { constexpr bool less = I + 1 < std::tuple_size>::value; return CastToPyBufferImpl()(tensor); @@ -128,7 +128,7 @@ template void PyCUDATensorSetFromArray( framework::Tensor &self, py::array_t array, - paddle::platform::GPUPlace &place) { + paddle::platform::CUDAPlace &place) { std::vector dims; dims.reserve(array.ndim()); for (size_t i = 0; i < array.ndim(); ++i) { @@ -138,9 +138,9 @@ void PyCUDATensorSetFromArray( self.Resize(framework::make_ddim(dims)); auto *dst = self.mutable_data(place); - framework::DeviceContextPool &pool = framework::DeviceContextPool::Get(); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto dev_ctx = - static_cast(pool.Borrow(place)); + static_cast(pool.Get(place)); paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice, dev_ctx->stream()); } diff --git a/paddle/scripts/CMakeLists.txt b/paddle/scripts/CMakeLists.txt index a52f06fe497dac467e4ef2543ebda7a423ca326d..68cb5a19f99ab5148b04d193eb2356588bdc5a59 100644 --- a/paddle/scripts/CMakeLists.txt +++ b/paddle/scripts/CMakeLists.txt @@ -5,11 +5,3 @@ configure_file(submit_local.sh.in install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle DESTINATION bin PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) - -configure_file(tools/usage_stat/usage.sh - paddle_usage - @ONLY) - -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle_usage DESTINATION opt/paddle/bin - PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index e43b9c218a3ecb9e7f20fb7e8b14a85a29947eef..f1e244772f3f9e636dc749a21dc965743aa642da 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -178,7 +178,7 @@ EOF # run paddle version to install python packages first RUN apt-get update &&\ ${NCCL_DEPS}\ - apt-get install -y wget python-pip dmidecode && pip install -U pip && \ + apt-get install -y wget python-pip dmidecode python-tk && pip install -U pip && \ pip install /*.whl; apt-get install -f -y && \ apt-get clean -y && \ rm -f /*.whl && \ @@ -193,6 +193,25 @@ EOF EOF } +function gen_capi_package() { + if [[ ${WITH_C_API} == "ON" ]]; then + install_prefix="/paddle/build/capi_output" + rm -rf $install_prefix + + make DESTDIR="$install_prefix" install + + if [[ ${WITH_MKL:-OFF} == "ON" ]]; then + find ./third_party/install -name 'libmklml_gnu.so' -exec cp {} $install_prefix/usr/local/lib \; + find ./third_party/install -name 'libmklml_intel.so' -exec cp {} $install_prefix/usr/local/lib \; + cp -P ./third_party/install/mkldnn/lib/* $install_prefix/usr/local/lib/ + fi + + find ./third_party/install -name 'libiomp5.so' -exec cp {} $install_prefix/usr/local/lib \; + cd $install_prefix/usr/local + ls | egrep -v "^Found.*item$" | xargs tar /paddle/build/paddle.tgz + fi +} + set -xe cmake_gen ${PYTHON_ABI:-""} @@ -200,6 +219,11 @@ run_build run_test gen_docs gen_dockerfile - -printf "If you need to install PaddlePaddle in develop docker image," -printf "please make install or pip install build/python/dist/*.whl.\n" +gen_capi_package + +if [[ ${WITH_C_API:-OFF} == "ON" ]]; then + printf "PaddlePaddle C-API libraries was generated on build/paddle.tgz\n" +else + printf "If you need to install PaddlePaddle in develop docker image," + printf "please make install or pip install build/python/dist/*.whl.\n" +fi diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index 43d2d1b410fa86dc0ab213cba0c2a488770ea1c7..8a352b0078d701f797f7202c85bd0e08201ac9b8 100755 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -71,9 +71,7 @@ function threads_config() { # auto set OMP_NUM_THREADS and MKL_NUM_THREADS # according to trainer_count and total processors # only when MKL enabled - if [ "@WITH_MKL@" == "OFF" ]; then - return 0 - fi + # auto set OPENBLAS_NUM_THREADS when do not use MKL processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l` trainers=`grep -Eo 'trainer_count.[0-9]+' <<< "$@" |grep -Eo '[0-9]+'|xargs` if [ -z $trainers ]; then @@ -83,12 +81,19 @@ function threads_config() { if [ $threads -eq 0 ]; then threads=1 fi - if [ -z "$OMP_NUM_THREADS" ]; then - export OMP_NUM_THREADS=$threads - fi - if [ -z "$MKL_NUM_THREADS" ]; then - export MKL_NUM_THREADS=$threads + if [ "@WITH_MKL@" == "ON" ]; then + if [ -z "$OMP_NUM_THREADS" ]; then + export OMP_NUM_THREADS=$threads + fi + if [ -z "$MKL_NUM_THREADS" ]; then + export MKL_NUM_THREADS=$threads + fi + else + if [ -z "$OPENBLAS_NUM_THREADS" ]; then + export OPENBLAS_NUM_THREADS=$threads + fi fi + } PADDLE_CONF_HOME="$HOME/.config/paddle" @@ -150,7 +155,7 @@ fi case "$1" in "train") threads_config $@ - # echo $OMP_NUM_THREADS $MKL_NUM_THREADS + # echo $OMP_NUM_THREADS $MKL_NUM_THREADS $OPENBLAS_NUM_THREADS ${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2} ;; "merge_model") @@ -165,9 +170,6 @@ case "$1" in "make_diagram") python -m paddle.utils.make_model_diagram ${@:2} ;; - "usage") - $PADDLE_BIN_PATH/paddle_usage ${@:2} - ;; "version") version ;; diff --git a/paddle/scripts/tools/usage_stat/usage.sh b/paddle/scripts/tools/usage_stat/usage.sh deleted file mode 100755 index 7dbd1f58842f50ea1df0e2476c4a493569b1dda9..0000000000000000000000000000000000000000 --- a/paddle/scripts/tools/usage_stat/usage.sh +++ /dev/null @@ -1,168 +0,0 @@ -#!/bin/bash - -ARGPARSE=`getopt -o u:vin:l:e: --long git-user:,help,dry-run,task-name:,log-file:,exit-code: -- "$@"` -KEEP_ANONYMOUS="A_USER_DOES_NOT_TELL_US" -# paddle config home dir, same as paddle -PADDLE_CONF_HOME="$HOME/.config/paddle" -# api url, mirror url(s) will be append later -PD_URLS="http://api.paddlepaddle.org/version" - -usage() -{ - echo "Usage: `basename $0` [options]" - echo "Options:" - echo " -e, --exit-code=EXIT_CODE The train/predict process's exit code" - echo " -l, --log-file=LOG_FILE_PATH Read which log file to get the duration of process" - echo " -n, --task-name=TASK_NAME The name of demo or example" - echo " -u, --git-user=GITHUB_USER provide contact info, like username or email" - echo " -v, -i Verbose output and interact with user when necessary" - echo " --help display this help message" -} - -eval set -- "${ARGPARSE}" -while true; do - case "$1" in - -l|--log-file) - log_file=$2 - shift 2 - ;; - -e|--exit-code) - exit_code=$2 - shift 2 - ;; - -u|--git-user) - github_user=$2 - shift 2 - ;; - -n|--task-name) - task=$2 - shift 2 - ;; - -v|-i) - v=1 - shift - ;; - --dry-run) - dry_run=1 - shift - ;; - --) - shift - break - ;; - --help) - usage - exit 0 - ;; - *) - echo "Invalid option $1" - usage - exit 1 - ;; - esac -done - -# parse the log_file to get the time costs -if [ -s "${log_file}" ]; then - duration=`awk 'BEGIN{day=0;last_sec=0;min_sec=0;max_sec=0;} - {if(index($2,":")==3){ - t=substr($2,1,8); - sec=day*86400+substr(t,1,2)*3600+substr(t,4,2)*60+substr(t,7,2); - if(secsec){min_sec=sec;} - if(max_sec==0 || max_sec/dev/null` - git_url=`git config --get remote.origin.url 2>/dev/null` - if [ "`echo ${git_url} | cut -b 1-19`" = "https://github.com/" ]; then - # under a git url, like https://github.com/user_xxx/proj_yyy.git - if [ "${v}" = "1" ]; then echo " from github url..."; fi - github_user=`echo ${git_url} | cut -d "/" -f 4` - if [ "${github_user}" = "PaddlePaddle" ]; then - github_user= - fi - fi - if [ -n "${git_username}" -a -z "${github_user}" ]; then - if [ "${v}" = "1" ]; then echo " from global git username..."; fi - github_user=${git_username} - fi - fi -fi -# allow user to set the user name, if it's not found -if [ -z "${github_user}" -a "${v}" = "1" ]; then - read -p "Please input your github username or email, or just return to keep this feedback anonymous:" - github_user=${REPLY} - if [ -z "${github_user}" ]; then - # empty input, consider as one anonymous user - github_user="${KEEP_ANONYMOUS}" - fi -fi -if [ -n "${github_user}" -a -z "${dry_run}" ]; then - # valid user and not in dry-run mode, then save to cache - mkdir -p ${PADDLE_CONF_HOME} - echo "${github_user}" >${PADDLE_CONF_HOME}/github_user -fi -if [ "${v}" = "1" ]; then echo "username: ${github_user}"; fi -if [ "${github_user}" = "${KEEP_ANONYMOUS}" ]; then - # anonymous user should keep the var empty. - github_user= -fi - -# read local paddle version -paddle_version=`paddle version | grep PaddlePaddle | head -n1 | cut -d " " -f 2 | cut -d "," -f 1` -if [ "${v}" = "1" ]; then echo "version:${paddle_version}"; fi - -# read local system time -system_time=`date "+%Y%m%d%H%M%S"` -if [ "${v}" = "1" ]; then echo "system time:${system_time}"; fi - -# make empty job_name as default value. -if [ -z "${task}" ]; then - task="(unknown_task)" -fi -if [ "${v}" = "1" ]; then echo "task: ${task}"; fi - -# concat the curl command -params="content={\"data_type\":\"usage\",\ -\"system_time\":${system_time},\"paddle_version\":\"${paddle_version}\",\ -\"github_user\":\"${github_user}\",\"job_name\":\"${task}\",\ -\"duration\":${duration},\"exit_code\":\"${exit_code}\"\ -}&type=1" -curl_cmd_prefix="curl -m 5 -X POST -d ${params}\ - -b ${PADDLE_CONF_HOME}/paddle.cookie -c ${PADDLE_CONF_HOME}/paddle.cookie " - -if [ "${dry_run}" = "1" ]; then - first_url=`echo ${PD_URLS} | cut -d " " -f 1` - echo "(dry-run mode)curl command: ${curl_cmd_prefix} ${first_url}" - exit 0 -else - for u in ${PD_URLS}; do - curl_cmd="${curl_cmd_prefix} ${u}" - if [ "${v}" = "1" ]; then echo "run: ${curl_cmd}"; fi - ${curl_cmd} >/dev/null 2>&1 - if [ $? -eq 0 ]; then - if [ "${v}" = "1" ]; then echo "upload OK!"; fi - exit 0 - else - if [ "${v}" = "1" ]; then echo "upload failed...try next"; fi - fi - done - if [ "${v}" = "1" ]; then echo "all urls tried but all failed...exit"; fi - exit 1 -fi diff --git a/paddle/string/to_string.h b/paddle/string/to_string.h index 4f478b6a36b23bdba8ef3ddae94b3eadf18716c2..3b3bcc69a478045156225728236174fd601461dd 100644 --- a/paddle/string/to_string.h +++ b/paddle/string/to_string.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include diff --git a/paddle/string/to_string_test.cc b/paddle/string/to_string_test.cc index 971484dd0c073762e99f3926576eb21b96197769..4956bd96fad5fd1decaad0a367135cb7d7ecaf6e 100644 --- a/paddle/string/to_string_test.cc +++ b/paddle/string/to_string_test.cc @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #include "paddle/string/to_string.h" #include diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index 8132742749e4a622720c66692c8d09815714ebea..77f84cd43bdf35ae6f54b0db2b5f720d24872878 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -6,7 +6,6 @@ if(WITH_TESTING) add_library(paddle_test_util STATIC TestUtil.cpp) add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) if(NOT MOBILE_INFERENCE) - add_library(paddle_gtest_main STATIC paddle_gtest_main.cc) - add_dependencies(paddle_gtest_main paddle_memory gtest gflags) + cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init paddle_memory gtest gflags) endif() endif() diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index a491322b7e533f7a9c263a249494440269391003..108ff335bf6b920c648d4bfebbd6a40ffb6fd939 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "gflags/gflags.h" #include "gtest/gtest.h" +#include "paddle/framework/init.h" #include "paddle/memory/memory.h" int main(int argc, char** argv) { @@ -32,8 +34,11 @@ int main(int argc, char** argv) { google::ParseCommandLineFlags(&new_argc, &new_argv_address, false); testing::InitGoogleTest(&argc, argv); paddle::memory::Used(paddle::platform::CPUPlace()); + std::vector devs = {"CPU"}; #ifdef PADDLE_WITH_CUDA - paddle::memory::Used(paddle::platform::GPUPlace(0)); + paddle::memory::Used(paddle::platform::CUDAPlace(0)); + devs.push_back("GPU:0"); #endif + paddle::framework::InitDevices(devs); return RUN_ALL_TESTS(); } diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index a0a365aa0bb0ac26939a02c1cd626d0c17c6a9fe..2b68d89e48a3efd5de205ce33643b7e6320a4303 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -29,6 +29,7 @@ DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkl_packed); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -46,6 +47,7 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn << ",use_mkldnn=" << FLAGS_use_mkldnn + << ",use_mkl_packed=" << FLAGS_use_mkl_packed << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index 9a7dc0e35622383a190f8b3a80736e6b42c9c959..ea47cf23eb6e56082eeb92f3c6dff8d03be0d679 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -27,6 +27,13 @@ DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); DEFINE_bool(use_mkldnn, false, "Only support CPU training"); #endif +#ifdef PADDLE_WITH_MKLML +// TODO(TJ): change to true when fully confirmed +DEFINE_bool(use_mkl_packed, false, "Whether to use MKL Packed Optimization"); +#else +DEFINE_bool(use_mkl_packed, false, "Not to use MKL Packed Optimization"); +#endif + DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index 1832bb515ec85df3d7733e01b063a01ad6a3b282..b64295bca09a199f24605a158d1d9db7e7d91660 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -41,3 +41,4 @@ DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkl_packed); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 239fe4204b20a37a0869ba1e0e99adf4293dac7e..4fdf4090212e31adcccf6b119c937e70d5cbf995 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3622,8 +3622,13 @@ class ConcatenateLayer2(LayerBase): @config_layer('recurrent') class RecurrentLayer(LayerBase): + layer_type = 'recurrent' + def __init__(self, name, inputs, reversed=False, bias=True, **xargs): - super(RecurrentLayer, self).__init__(name, 'recurrent', 0, inputs, + use_mkl_packed = bool( + int(g_command_config_args.get("use_mkl_packed", 0))) + self.layer_type = 'mkl_packed_recurrent' if use_mkl_packed else 'recurrent' + super(RecurrentLayer, self).__init__(name, self.layer_type, 0, inputs, **xargs) config_assert(len(self.inputs) == 1, 'RecurrentLayer must have 1 input') input_layer = self.get_input_layer(0) diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index ecba87191045cff6c05014010e60575741238f8d..e6f87ce61b1d16d4f98f111626776aa52c2ec35b 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -58,12 +58,12 @@ def is_compatible_with(x, Type): class HookAttribute(object): """ - Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs + Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs during training process of a layer with parameters, such as img_conv layer, fc layer. - :param type: Hook type, currently supported types: + :param type: Hook type, currently supported types: 'pruning' : user specify a sparsity_ratio before training started, and the - network will prune the parameters based on the sparsity_ratio. + network will prune the parameters based on the sparsity_ratio. eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6) The specific usage can be paddle.layer.img_conv(input=img, filter_size=3, num_channels=3, num_filters=64, @@ -71,10 +71,10 @@ class HookAttribute(object): The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf :type type: string - :param sparsity_ratio: Must be specified if hook type is 'pruning', + :param sparsity_ratio: Must be specified if hook type is 'pruning', it represents the ratio of the zero elements to be set by the Parameter. :type sparsity_ratio: float or None - + """ def __init__(self, type, sparsity_ratio=None): @@ -130,10 +130,12 @@ class ParameterAttribute(object): :param sparse_update: Enable sparse update for this parameter. It will enable both local and remote sparse update. :type sparse_update: bool + :param update_hooks: A HookAttribute object. + :type update_hooks: HookAttribute :param initializer: If not None, it should be a callable object which accepts a parameter name and returns numpy array for the initial value of the parameter - :param initializer: callable object + :type initializer: callable object """ def __init__(self, diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 7e118b24a4330af27ea6aa893fd87985b4443cdb..19e2ab1b7da7b1ceacd6842f2d74ac551497c77b 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -270,7 +270,7 @@ class LayerType(object): @staticmethod def is_layer_type(type_name): """ - If type_name is a layer type. + Whether type_name is a layer type. :param type_name: layer type name. Because layer type enumerations are strings. @@ -441,7 +441,7 @@ def full_matrix_projection(input, size=0, param_attr=None): with mixed_layer(size=100) as m: m += full_matrix_projection(input=layer) - 2. When used as an independant object like this, you must set the size: + 2. When used as an independent object like this, you must set the size: .. code-block:: python @@ -451,11 +451,11 @@ def full_matrix_projection(input, size=0, param_attr=None): :param input: The input of this layer. :type input: LayerOutput - :param size: The parameter size. Means the width of parameter. + :param size: The dimension of this layer. :type size: int - :param param_attr: Parameter config, None if use default. + :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute - :return: A FullMatrixProjection Object. + :return: FullMatrixProjection Object. :rtype: FullMatrixProjection """ proj = FullMatrixProjection( @@ -468,12 +468,12 @@ def full_matrix_projection(input, size=0, param_attr=None): def trans_full_matrix_projection(input, size=0, param_attr=None): """ Different from full_matrix_projection, this projection performs matrix - multiplication, using transpose of weight. + multiplication, using the transpose of weight. .. math:: out.row[i] += in.row[i] * w^\mathrm{T} - :math:`w^\mathrm{T}` means transpose of weight. + :math:`w^\mathrm{T}` means the transpose of weight. The simply usage is: .. code-block:: python @@ -489,9 +489,9 @@ def trans_full_matrix_projection(input, size=0, param_attr=None): :type input: LayerOutput :param size: The parameter size. Means the width of parameter. :type size: int - :param param_attr: Parameter config, None if use default. + :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute - :return: A TransposedFullMatrixProjection Object. + :return: TransposedFullMatrixProjection Object. :rtype: TransposedFullMatrixProjection """ proj = TransposedFullMatrixProjection( @@ -521,7 +521,7 @@ def table_projection(input, size=0, param_attr=None): with mixed_layer(size=100) as m: m += table_projection(input=layer) - 2. When used as an independant object like this, you must set the size: + 2. When used as an independent object like this, you must set the size: .. code-block:: python @@ -532,11 +532,11 @@ def table_projection(input, size=0, param_attr=None): :param input: The input of this layer, which must contains id fields. :type input: LayerOutput - :param size: The parameter size. Means the width of parameter. + :param size: The dimension of the output. :type size: int - :param param_attr: Parameter config, None if use default. + :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute - :return: A TableProjection Object. + :return: TableProjection Object. :rtype: TableProjection """ proj = TableProjection( @@ -547,7 +547,7 @@ def table_projection(input, size=0, param_attr=None): def identity_projection(input, offset=None, size=None): """ - 1. IdentityProjection if offset=None. It performs: + 1. If offset=None, it performs IdentityProjection as follows: .. math:: out.row[i] += in.row[i] @@ -559,9 +559,8 @@ def identity_projection(input, offset=None, size=None): proj = identity_projection(input=layer) - 2. IdentityOffsetProjection if offset!=None. It likes IdentityProjection, - but layer size may be smaller than input size. - It select dimesions [offset, offset+layer_size) from input: + 2. If offset!=None, It executes IdentityOffsetProjection and takes the + elements of the input in the range [offset, offset+size) as output. .. math:: out.row[i] += in.row[i + \\textrm{offset}] @@ -573,14 +572,20 @@ def identity_projection(input, offset=None, size=None): proj = identity_projection(input=layer, offset=10) - Note that both of two projections should not have any parameter. + Note that neither of the projections have trainable parameter. :param input: The input of this layer. :type input: LayerOutput - :param offset: Offset, None if use default. + :param offset: The offset from the start of the input. The input's + elements in the range [offset, offset+size) will be + taken as output. If this parameter is not set or set + to None, the output will be the same as the input. :type offset: int - :return: A IdentityProjection or IdentityOffsetProjection object - :rtype: IdentityProjection or IdentityOffsetProjection + :param size: The dimension of this layer. It will be neglected + when offset is None or not set. + :type size: int + :return: IdentityProjection or IdentityOffsetProjection object + :rtype: IdentityProjection | IdentityOffsetProjection """ if offset is None: proj = IdentityProjection(input_layer_name=input.name) @@ -596,8 +601,8 @@ def identity_projection(input, offset=None, size=None): def slice_projection(input, slices): """ - slice_projection can slice the input value into multiple parts, - and then select some of them to merge into a new output. + slice_projection slices the input value into multiple parts, + then selects and merges some of them into a new output. .. math:: output = [input.slices()] @@ -608,15 +613,13 @@ def slice_projection(input, slices): proj = slice_projection(input=layer, slices=[(0, 10), (20, 30)]) - Note that slice_projection should not have any parameter. + Note that slice_projection has no trainable parameter. :param input: The input of this layer. :type input: LayerOutput - :param slices: An array of slice parameters. - Each slice contains the start and end offsets based - on the input. - :type slices: pair of int - :return: A SliceProjection object + :param slices: A list of start and end offsets of each slice. + :type slices: list of tuple + :return: SliceProjection object. :rtype: SliceProjection """ assert len(slices) >= 1 @@ -636,8 +639,7 @@ def slice_projection(input, slices): @wrap_param_attr_default() def scaling_projection(input, param_attr=None): """ - scaling_projection multiplies the input with a scalar parameter and add to - the output. + scaling_projection multiplies the input with a scalar parameter. .. math:: out += w * in @@ -650,9 +652,9 @@ def scaling_projection(input, param_attr=None): :param input: The input of this layer. :type input: LayerOutput - :param param_attr: Parameter config, None if use default. + :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute - :return: A ScalingProjection object + :return: ScalingProjection object. :rtype: ScalingProjection """ proj = ScalingProjection(input_layer_name=input.name, **param_attr.attr) @@ -663,8 +665,8 @@ def scaling_projection(input, param_attr=None): @wrap_param_attr_default() def dotmul_projection(input, param_attr=None): """ - DotMulProjection with a layer as input. - It performs element-wise multiplication with weight. + DotMulProjection takes a layer as input and performs + element-wise multiplication with weight. .. math:: out.row[i] += in.row[i] .* weight @@ -679,9 +681,9 @@ def dotmul_projection(input, param_attr=None): :param input: The input of this layer. :type input: LayerOutput - :param param_attr: Parameter config, None if use default. + :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute - :return: A DotMulProjection Object. + :return: DotMulProjection object. :rtype: DotMulProjection """ proj = DotMulProjection( @@ -698,7 +700,7 @@ def dotmul_operator(a=None, b=None, scale=1, **kwargs): out.row[i] += scale * (a.row[i] .* b.row[i]) where :math:`.*` means element-wise multiplication, and - scale is a config scalar, its default value is one. + scale is a config scalar, its default value is 1. The example usage is: @@ -706,13 +708,13 @@ def dotmul_operator(a=None, b=None, scale=1, **kwargs): op = dotmul_operator(a=layer1, b=layer2, scale=0.5) - :param a: Input layer1 + :param a: The first input of this layer. :type a: LayerOutput - :param b: Input layer2 + :param b: The second input of this layer. :type b: LayerOutput - :param scale: config scalar, default value is one. + :param scale: A scalar to scale the product. Its default value is 1. :type scale: float - :return: A DotMulOperator Object. + :return: DotMulOperator object. :rtype: DotMulOperator """ if 'x' in kwargs or 'y' in kwargs: @@ -738,28 +740,29 @@ def context_projection(input, """ Context Projection. - It just simply reorganizes input sequence, combines "context_len" sequence - to one context from context_start. "context_start" will be set to - -(context_len - 1) / 2 by default. If context position out of sequence + It just reorganizes input sequence, combines "context_len" elements of the + sequence to one context from context_start. "context_start" will be set to + -(context_len - 1) / 2 by default. When context position is out of sequence length, padding will be filled as zero if padding_attr = False, otherwise it is trainable. - For example, origin sequence is [A B C D E F G], context len is 3, then - after context projection and not set padding_attr, sequence will + For example, origin sequence is [A B C D E F G], context len is 3, padding_attr + is not set, then after context projection, sequence will be [ 0AB ABC BCD CDE DEF EFG FG0 ]. :param input: The input of this layer, which should be a sequence. :type input: LayerOutput - :param context_len: context length. + :param context_len: The length of the context. :type context_len: int - :param context_start: context start position. Default is + :param context_start: The start position of the context. The default value is -(context_len - 1)/2 :type context_start: int - :param padding_attr: Padding Parameter Attribute. If false, it means padding - always be zero. Otherwise Padding is learnable, and - parameter attribute is set by this parameter. + :param padding_attr: Parameter attribute of the padding. If the parameter is + set to False, padding will be zero. In other cases, the + padding is trainable, and its parameter attribute is set + by this parameter. :type padding_attr: bool | ParameterAttribute - :return: Projection + :return: Projection object. :rtype: Projection """ context_start = -( @@ -791,10 +794,9 @@ class MixedLayerType(LayerOutput): def __init__(self, name, size, act, bias_attr, layer_attr, parents=None): """ - Ctor. - :param name: layer name. + :param name: The name of this layer. :type name: basestring - :param size: layer size. + :param size: The dimension of this layer. :type size: int :param act: Activation type. :type act: BaseActivation @@ -802,8 +804,9 @@ class MixedLayerType(LayerOutput): whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: Extra Layer Attribute. - :type layer_attr: ExtraLayerAttribute or None + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute | None """ LayerOutput.__init__( self, @@ -868,12 +871,12 @@ def mixed_layer(size=0, bias_attr=False, layer_attr=None): """ - Mixed Layer. A mixed layer will add all inputs together, then activate. - Each inputs is a projection or operator. + Mixed Layer. A mixed layer will add all inputs together, then activate the sum. + Each input is a projection or operator. There are two styles of usages. - 1. When not set inputs parameter, use mixed_layer like this: + 1. When the parameter input is not set, use mixed_layer like this: .. code-block:: python @@ -889,21 +892,21 @@ def mixed_layer(size=0, input=[full_matrix_projection(input=layer1), full_matrix_projection(input=layer2)]) - :param name: mixed layer name. Can be referenced by other layer. + :param name: The name of this layer. It is optional. :type name: basestring - :param size: layer size. + :param size: The dimension of this layer. :type size: int - :param input: The input of this layer. It is an optional parameter. If set, - then this function will just return layer's name. + :param input: The input of this layer. It is an optional parameter. :param act: Activation Type. LinearActivation is the default activation. :type act: BaseActivation :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The extra layer config. Default is None. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :return: MixedLayerType object can add inputs or layer name. + :return: MixedLayerType object. :rtype: MixedLayerType """ @@ -938,14 +941,15 @@ def data_layer(name, size, depth=None, height=None, width=None, :param name: The name of this layer. :type name: basestring - :param size: Size of this data layer. + :param size: The dimension of this data layer. :type size: int - :param height: Height of this data layer, used for image + :param height: The height of the input image data. :type height: int | None - :param width: Width of this data layer, used for image + :param width: The width of the input image data. :type width: int | None - :param layer_attr: Extra Layer Attribute. - :type layer_attr: ExtraLayerAttribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -978,14 +982,15 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): :param name: The name of this layer. It is optional. :type name: basestring - :param input: The input of this layer, which must be Index Data. + :param input: The input of this layer, whose type must be Index Data. :type input: LayerOutput - :param size: The embedding dimension. + :param size: The dimension of the embedding vector. :type size: int :param param_attr: The embedding parameter attribute. See ParameterAttribute for details. - :type param_attr: ParameterAttribute | None - :param layer_attr: Extra layer Config. Default is None. + :type param_attr: ParameterAttribute + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute | None :return: LayerOutput object. :rtype: LayerOutput @@ -1013,7 +1018,7 @@ def fc_layer(input, bias_attr=None, layer_attr=None): """ - Helper for declare fully connected layer. + The fully connected layer. The example usage is: @@ -1035,17 +1040,18 @@ def fc_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput | list | tuple - :param size: The layer dimension. + :param size: The dimension of this layer. :type size: int :param act: Activation Type. TanhActivation is the default activation. :type act: BaseActivation - :param param_attr: The Parameter Attribute|list. + :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: Extra Layer config. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute | None :return: LayerOutput object. :rtype: LayerOutput @@ -1086,13 +1092,15 @@ def fc_layer(input, @wrap_name_default("print") def printer_layer(input, format=None, name=None): """ - Print the output value of input layers. This layer is useful for debugging. + Print the output value of the layers specified by the parameter input. + This layer is useful for debugging. :param name: The name of this layer. It is optional. :type name: basestring :param input: The input of this layer. :type input: LayerOutput | list | tuple - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ if isinstance(input, LayerOutput): input = [input] @@ -1135,11 +1143,12 @@ def priorbox_layer(input, :param aspect_ratio: The aspect ratio. :type aspect_ratio: list :param variance: The bounding box variance. - :type min_size: The min size of the priorbox width/height. + :type min_size: The minimum size of the priorbox width/height. :param min_size: list - :type max_size: The max size of the priorbox width/height. Could be NULL. + :type max_size: The maximum size of the priorbox width/height. It could be NULL. :param max_size: list - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ # plus one for ratio 1. num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4 @@ -1177,7 +1186,7 @@ def multibox_loss_layer(input_loc, :param name: The name of this layer. It is optional. :type name: basestring - :param input_loc: The input predict locations. + :param input_loc: The input predicted locations. :type input_loc: LayerOutput | List of LayerOutput :param input_conf: The input priorbox confidence. :type input_conf: LayerOutput | List of LayerOutput @@ -1189,13 +1198,15 @@ def multibox_loss_layer(input_loc, :type num_classes: int :param overlap_threshold: The threshold of the overlap. :type overlap_threshold: float - :param neg_pos_ratio: The ratio of the negative bbox to the positive bbox. + :param neg_pos_ratio: The ratio of the negative bounding box to + the positive bounding box. :type neg_pos_ratio: float - :param neg_overlap: The negative bbox overlap threshold. + :param neg_overlap: The negative bounding box overlap threshold. :type neg_overlap: float :param background_id: The background class index. :type background_id: int - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ if isinstance(input_loc, LayerOutput): input_loc = [input_loc] @@ -1258,19 +1269,20 @@ def detection_output_layer(input_loc, :type input_conf: LayerOutput | List of LayerOutput. :param priorbox: The input priorbox location and the variance. :type priorbox: LayerOutput - :param num_classes: The number of the classification. + :param num_classes: The number of the classes. :type num_classes: int :param nms_threshold: The Non-maximum suppression threshold. :type nms_threshold: float - :param nms_top_k: The bbox number kept of the NMS's output + :param nms_top_k: The bounding boxes number kept of the NMS's output. :type nms_top_k: int - :param keep_top_k: The bbox number kept of the layer's output + :param keep_top_k: The bounding boxes number kept of the layer's output. :type keep_top_k: int - :param confidence_threshold: The classification confidence threshold + :param confidence_threshold: The classification confidence threshold. :type confidence_threshold: float :param background_id: The background class index. :type background_id: int - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ if isinstance(input_loc, LayerOutput): input_loc = [input_loc] @@ -1326,7 +1338,7 @@ def roi_pool_layer(input, A layer used by Fast R-CNN to extract feature maps of ROIs from the last feature map. - :param name: The Layer Name. + :param name: The name of this layer. It is optional. :type name: basestring :param input: The input layer. :type input: LayerOutput. @@ -1338,9 +1350,10 @@ def roi_pool_layer(input, :type pooled_height: int :param spatial_scale: The spatial scale between the image and feature map. :type spatial_scale: float - :param num_channels: number of input channel. + :param num_channels: The number of the input channels. :type num_channels: int - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ if num_channels is None: assert input.num_filters is not None @@ -1361,18 +1374,19 @@ def roi_pool_layer(input, @wrap_name_default("cross_channel_norm") def cross_channel_norm_layer(input, name=None, param_attr=None): """ - Normalize a layer's output. This layer is necessary for ssd. - This layer applys normalize across the channels of each sample to - a conv layer's output and scale the output by a group of trainable - factors which dimensions equal to the channel's number. + Normalize a layer's output. This layer is necessary for ssd. This + layer applys normalization across the channels of each sample to + a convolutional layer's output and scales the output by a group of + trainable factors whose dimensions equal to the channel's number. :param name: The name of this layer. It is optional. :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param param_attr: The Parameter Attribute|list. + :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute - :return: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput """ assert input.num_filters is not None Layer( @@ -1413,12 +1427,9 @@ def pooling_layer(input, Pooling layer for sequence inputs, not used for Image. If stride > 0, this layer slides a window whose size is determined by stride, - and return the pooling value of the window as the output. Thus, a long sequence - will be shorten. - - The parameter stride specifies the intervals at which to apply the pooling - operation. Note that for sequence with sub-sequence, the default value - of stride is -1. + and returns the pooling value of the sequence in the window as the output. Thus, + a long sequence will be shortened. Note that for sequence with sub-sequence, the + default value of stride is -1. The example usage is: @@ -1435,16 +1446,16 @@ def pooling_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param pooling_type: Type of pooling, MaxPooling(default), AvgPooling, - SumPooling, SquareRootNPooling. + :param pooling_type: Type of pooling. MaxPooling is the default pooling. :type pooling_type: BasePoolingType | None :param stride: The step size between successive pooling regions. - :type stride: Int + :type stride: int :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param layer_attr: The Extra Attributes for layer, such as dropout. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute | None :return: LayerOutput object. :rtype: LayerOutput @@ -6618,7 +6629,7 @@ def row_conv_layer(input, .. math:: r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}} - \quad \text{for} \quad (1 \leq i \leq d) + \quad \\text{for} \quad (1 \leq i \leq d) Note: The `context_len` is `k + 1`. That is to say, the lookahead step @@ -6767,7 +6778,7 @@ def gated_unit_layer(input, The gated unit layer implements a simple gating mechanism over the input. The input :math:`X` is first projected into a new space :math:`X'`, and it is also used to produce a gate weight :math:`\sigma`. Element-wise - product between :match:`X'` and :math:`\sigma` is finally returned. + product between :math:`X'` and :math:`\sigma` is finally returned. Reference: `Language Modeling with Gated Convolutional Networks @@ -7463,7 +7474,7 @@ def factorization_machine(input, Factorization Machine with the formula: .. math:: - y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j + y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \\rangle x_i x_j Note: X is the input vector with size n. V is the factor matrix. Each row of V diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 70f61e84997efdbe3d6f268d249be8bac15b9ecd..0de417df2cb942ce46ff8ac3acc61ae4999ed634 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -135,6 +135,8 @@ def init(**kwargs): cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] if 'use_mkldnn' in kwargs: cp.g_command_config_args['use_mkldnn'] = kwargs['use_mkldnn'] + if 'use_mkl_packed' in kwargs: + cp.g_command_config_args['use_mkl_packed'] = kwargs['use_mkl_packed'] assert 'parallel_nn' not in kwargs, ("currently 'parallel_nn' is not " "supported in v2 APIs.") diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index 634388094c804827657dc83d5c205e680625b156..7bdddeaabec733ef26b3f766c6437f5c53d65044 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -44,7 +44,7 @@ __all__ = ['train', 'test', 'valid'] DATA_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz' LABEL_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat' SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat' -DATA_MD5 = '52808999861908f626f3c1f4e79d11fa' +DATA_MD5 = '33bfc11892f1e405ca193ae9a9f2a118' LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d' SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c' # In official 'readme', tstid is the flag of test data diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index cfc1c886e1389c15e3f803c341b6f62dd7b4bf41..21ed7f7a5ce279f5bc65e5b008f14a1b0ff97343 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -23,10 +23,9 @@ Besides, this module also provides API for building dictionary. import paddle.v2.dataset.common import collections import tarfile -import Queue import re import string -import threading +import random __all__ = ['build_dict', 'train', 'test', 'convert'] @@ -74,47 +73,21 @@ def build_dict(pattern, cutoff): return word_idx -def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size): +def reader_creator(pos_pattern, neg_pattern, word_idx): UNK = word_idx[''] + INS = [] - qs = [Queue.Queue(maxsize=buffer_size), Queue.Queue(maxsize=buffer_size)] - - def load(pattern, queue): + def load(pattern, out, label): for doc in tokenize(pattern): - queue.put(doc) - queue.put(None) + out.append(([word_idx.get(w, UNK) for w in doc], label)) + + load(pos_pattern, INS, 0) + load(neg_pattern, INS, 1) + random.shuffle(INS) def reader(): - # Creates two threads that loads positive and negative samples - # into qs. - t0 = threading.Thread( - target=load, args=( - pos_pattern, - qs[0], )) - t0.daemon = True - t0.start() - - t1 = threading.Thread( - target=load, args=( - neg_pattern, - qs[1], )) - t1.daemon = True - t1.start() - - # Read alternatively from qs[0] and qs[1]. - i = 0 - doc = qs[i].get() - while doc != None: - yield [word_idx.get(w, UNK) for w in doc], i % 2 - i += 1 - doc = qs[i % 2].get() - - # If any queue is empty, reads from the other queue. - i += 1 - doc = qs[i % 2].get() - while doc != None: - yield [word_idx.get(w, UNK) for w in doc], i % 2 - doc = qs[i % 2].get() + for doc, label in INS: + yield doc, label return reader @@ -133,7 +106,7 @@ def train(word_idx): """ return reader_creator( re.compile("aclImdb/train/pos/.*\.txt$"), - re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000) + re.compile("aclImdb/train/neg/.*\.txt$"), word_idx) def test(word_idx): @@ -150,7 +123,7 @@ def test(word_idx): """ return reader_creator( re.compile("aclImdb/test/pos/.*\.txt$"), - re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000) + re.compile("aclImdb/test/neg/.*\.txt$"), word_idx) def word_dict(): diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 471255ef50a3be4739a89efbd978cdb4304d992d..5e01b8719806f4bb0c0d985373a5f4b076e05bd5 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -1,3 +1,4 @@ +from __future__ import print_function # import all class inside framework into fluid module import framework from framework import * @@ -15,19 +16,19 @@ import backward import regularizer from param_attr import ParamAttr from data_feeder import DataFeeder -from core import LoDTensor, CPUPlace, GPUPlace +from core import LoDTensor, CPUPlace, CUDAPlace from distribute_transpiler import DistributeTranspiler import clip Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + [ 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', - 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr' + 'regularizer', 'LoDTensor', 'CPUPlace', 'CUDAPlace', 'Tensor', 'ParamAttr' 'DataFeeder', 'clip', 'DistributeTranspiler' ] -def __read_gflags_from_env__(): +def __bootstrap__(): """ Enable reading gflags from environment variables. @@ -36,11 +37,35 @@ def __read_gflags_from_env__(): """ import sys import core - read_env_flags = ['use_pinned_memory'] + import os + + try: + num_threads = int(os.getenv('OMP_NUM_THREADS', '1')) + except ValueError: + num_threads = 1 + + if num_threads > 1: + print( + 'WARNING: OMP_NUM_THREADS set to {0}, not 1. The computation ' + 'speed will not be optimized if you use data parallel. It will ' + 'fail if this PaddlePaddle binary is compiled with OpenBlas since' + ' OpenBlas does not support multi-threads.'.format(num_threads), + file=sys.stderr) + print('PLEASE USE OMP_NUM_THREADS WISELY.', file=sys.stderr) + + os.environ['OMP_NUM_THREADS'] = str(num_threads) + + read_env_flags = ['use_pinned_memory', 'check_nan_inf'] if core.is_compile_gpu(): read_env_flags.append('fraction_of_gpu_memory_to_use') core.init_gflags([sys.argv[0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) + core.init_glog(sys.argv[0]) + + if core.is_compile_gpu(): + core.init_devices(["CPU", "GPU:0"]) + else: + core.init_devices(["CPU"]) -__read_gflags_from_env__() +__bootstrap__() diff --git a/python/paddle/v2/fluid/backward.py b/python/paddle/v2/fluid/backward.py index f188582178f667125ec95cd230100fdb10ce7e88..88fe19da5e2c2df7f7eed7b26261ec155f0013f7 100644 --- a/python/paddle/v2/fluid/backward.py +++ b/python/paddle/v2/fluid/backward.py @@ -1,54 +1,339 @@ from paddle.v2.fluid import framework as framework +from . import core +import collections -__all__ = ['append_backward_ops'] +__all__ = ['append_backward'] -def append_backward_ops(loss, parameter_list=None, no_grad_set=None): +def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): """ - Create and add gradient Operators in BlockDesc to compute - gradients of `loss` for parameters in parameter_list + Traverse all ops in op_descs[begin_idx : end_idx], + if any op has inputs/outputs named "old_name", rename it as 'new_name' + """ + if begin_idx is None: + begin_idx = 0 + if end_idx is None: + end_idx = len(op_descs) + for i in range(begin_idx, end_idx): + op_desc = op_descs[i] + if isinstance(op_desc, tuple): + op_desc = op_desc[0] + op_desc.rename_input(old_name, new_name) + op_desc.rename_output(old_name, new_name) + + +def _create_op_desc_(op_type, inputs, outputs, attrs): + """ + Create a C++ OpDesc object with specified inputs, outputs and attributes. + """ + op_desc = core.OpDesc() + op_desc.set_type(op_type) + for para, args in inputs.iteritems(): + op_desc.set_input(para, args) + for para, args in outputs.iteritems(): + op_desc.set_output(para, args) + for name, val in attrs.iteritems(): + if isinstance(val, framework.Block): + op_desc.set_block_attr(name, val.desc) + else: + op_desc.set_attr(name, val) + return op_desc + + +def _infer_var_data_type_(grad_var_name, block): + """ + Infer the data type of given grad variable + """ + grad_var = block.desc.find_var(grad_var_name.encode("ascii")) + fwd_name = _strip_grad_suffix_(grad_var_name.encode("ascii")) + if block.desc.has_var_recursive(fwd_name): + fwd_var = block.desc.find_var_recursive(fwd_name.encode("ascii")) + grad_var.set_dtype(fwd_var.dtype()) + else: + grad_var.set_dtype(core.DataType.FP32) + + +def _all_in_set_(cands, s): + """ + Test if all elements of 'cands' are in set 's' + """ + if len(cands) == 0: + return False + for c in cands: + if not c in s: + return False + return True + - :param loss: an variable generated by cost function. - :type loss: Variable - :param no_grad_set: variable that should not create gradient - :type no_grad_set: set - :param parameter_list: parameters that need to compute gradient and - update to optimize the lost. - :type: list - :return: list of (parameters, gradients) pair. - :rtype: list[Variable] +def _strip_grad_suffix_(name): + """ + Strip the grad suffix from the given varibale name + e.g. x@GRAD ==> x + y@GRAD@RENAME@1 ==> y + """ + pos = name.find(core.grad_var_suffix()) + return name[:pos] if pos != -1 else name + + +def _append_grad_suffix_(name): + """ + Append grad suffix to the given variable name + e.g. x ==> x@GRAD + """ + return name + core.grad_var_suffix() + + +def _addup_repetitive_outputs_(op_descs): + """ + In backward part, an variable may be the output of more than one ops. + In this case, the variable should be the accumulation of all the outputs. + `sum_op`s are added to implement the accumulate. + """ + pending_sum_ops = [] + var_rename_count = collections.defaultdict(int) + renamed_vars = collections.defaultdict(list) + for idx, op_desc in enumerate(op_descs): + for var_name in op_desc.input_arg_names(): + if len(renamed_vars[var_name]) > 1: + pending_sum_ops.append( + (_create_op_desc_("sum", {"X": renamed_vars[var_name]}, + {"Out": [var_name]}, {}), idx)) + renamed_vars[var_name] = [var_name] + for var_name in op_desc.output_arg_names(): + if var_name == core.empty_var_name( + ) or var_name in op_desc.input_arg_names(): + # empty variable or inplace op + continue + if len(renamed_vars[var_name]) == 0: + # it's the first time we get the variable + renamed_vars[var_name] = [var_name] + else: + if len(renamed_vars[var_name]) == 1: + new_name = var_name + "@RENAME@" + \ + str(var_rename_count[var_name]) + var_rename_count[var_name] += 1 + # rename original var_name + renamed_vars[var_name][0] = new_name + _rename_arg_(op_descs, var_name, new_name, 0, idx) + _rename_arg_(pending_sum_ops, var_name, new_name) + + new_name = var_name + "@RENAME@" + \ + str(var_rename_count[var_name]) + var_rename_count[var_name] += 1 + op_desc.rename_output(var_name, new_name) + renamed_vars[var_name].append(new_name) + for var_name, inputs in renamed_vars.iteritems(): + if len(inputs) > 1: + pending_sum_ops.append((_create_op_desc_( + "sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs))) + # sum_op descs are sorted according to their insert position + for p in reversed(pending_sum_ops): + op_descs.insert(p[1], p[0]) + + return op_descs + + +def _remove_no_grad_branch_(op_descs, no_grad_set): + """ + Remove unnecessary grad ops + A grad op can be removed in two cases: + 1. all outputs of the grad op are in 'no_grad_set' + 2. all grad inputs of the grad op are in 'no_grad_set' + """ + + def _op_can_be_removed_(op_desc, no_grad_set): + out_arg_names = op_desc.output_arg_names() + if len(out_arg_names) == 0 or _all_in_set_(out_arg_names, no_grad_set): + return True + if _all_in_set_( + filter(lambda name: name.find(core.grad_var_suffix()) != -1, + op_desc.input_arg_names()), no_grad_set): + no_grad_set.union(out_arg_names) + return True + return False + + # Remove ops whose outputs are all in no_grad_dict + op_descs = filter( + lambda op_desc: not _op_can_be_removed_(op_desc, no_grad_set), op_descs) + # Insert fill_zeros_like_op + to_insert = [] + for idx, op_desc in enumerate(op_descs): + for arg in op_desc.input_arg_names(): + if core.grad_var_suffix() in arg and arg in no_grad_set: + to_insert.append((_create_op_desc_("fill_zeros_like", { + "X": [_strip_grad_suffix_(arg)] + }, {"Y": [arg]}, {}), idx)) + + map(lambda p: op_descs.insert(p[1], p[0]), reversed(to_insert)) + + return op_descs + + +def _append_backward_ops_(target, + block, + target_block, + no_grad_dict, + grad_to_var, + callback=None): + """ + Create all grad ops, and insert them into given block + + Args: + target(Variable): the target variable of forward pass + block(Block): the block where forward ops are + target_block(Block): the block which is going to hold new generated grad ops + no_grad_dict(dict): + key(int) block index + val(set) a set of varibale names. These varibales have no gradient + grad_to_var(dict)(output argument): + key(str): grad variable name + val(str): corresponding forward variable name + """ + # grad_op_descs holds created grad_op, and will be appended to target_block + grad_op_descs = [] + program = block.program + for op in reversed(block.ops): + grad_sub_block_list = [] + # If the op has its own sub-block, deal with the sub-block first + if op.has_attr("sub_block"): + sub_block = program.block(op.block_attr("sub_block")) + grad_sub_block = program.create_block(parent_idx=sub_block.idx) + _append_backward_ops_(target, sub_block, grad_sub_block, + no_grad_dict, grad_to_var, callback) + grad_sub_block_list.append(grad_sub_block.desc) + + # Getting op's corresponding grad_op + grad_op_desc, op_grad_to_var = core.get_grad_op_desc( + op.desc, no_grad_dict[block.idx], grad_sub_block_list) + + grad_op_descs.extend(grad_op_desc) + grad_to_var.update(op_grad_to_var) + + grad_op_descs = _addup_repetitive_outputs_(grad_op_descs) + + grad_op_descs = _remove_no_grad_branch_(grad_op_descs, + no_grad_dict[block.idx]) + + if target_block.idx == 0: + grad_op_descs.insert( + 0, + _create_op_desc_("fill_constant", {}, { + "Out": [_append_grad_suffix_(target.name)] + }, {"shape": [1], + "value": 1.0, + "dtype": target.dtype})) + # append op_desc in grad_op_descs to target_block + for op_desc in grad_op_descs: + new_op_desc = target_block.desc.append_op() + new_op_desc.copy_from(op_desc) + + +def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map): + """ + Create new variables required by backward pass. + + Args: + block(Block): the block where new variables will be created + start_op_idx(int): Only variables required by ops in block.ops[start_op_idx : ] will be created + grad_to_var(dict): + key(str): grad variable name + val(str): corresponding forward variable name + In most cases, this dict is generated by _append_backward_ops_() + grad_info_map(dict)(output argument): + key(str): forward variable name + val(tuple): a tuple of (str, int), str is the corresponding grad name, int is the block index + """ + for op_idx in range(start_op_idx, block.desc.op_size()): + op_desc = block.desc.op(op_idx) + if op_desc.has_attr("sub_block"): + sub_block = block.program.block(op_desc.block_attr("sub_block")) + _append_backward_vars_(sub_block, 0, grad_to_var, grad_info_map) + new_vars = set() + # create new gradient variables + for grad_var_name in op_desc.output_arg_names(): + grad_var_name = grad_var_name.encode("ascii") + if block.desc.has_var_recursive( + grad_var_name) or grad_var_name == core.empty_var_name(): + continue + block.desc.var(grad_var_name) + new_vars.add(grad_var_name) + if not grad_to_var.has_key(grad_var_name): + continue + grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name, block) + # infer_shape and infer_type + op_desc.infer_var_type(block.desc) + op_desc.infer_shape(block.desc) + for arg in op_desc.output_arg_names(): + if arg in new_vars: + _infer_var_data_type_(arg, block) + + +def append_backward(loss, parameter_list=None, no_grad_set=None): + """ + Append backward part to main_program + + Args: + loss(Variable): The variable generated by cost function. + parameter_list(list): Parameters that need to be updated by optimizer. + If None, it means all parameters need to be updated. + no_grad_set(set): Variables that have no gradients in Block 0. + If None, the set will be generated inside the function and + contains all variables with `step_gradient=True` from all blocks. + + Return: + (list[Variable]): list of (parameters, gradients) pair. """ assert isinstance(loss, framework.Variable) + program = loss.block.program + no_grad_dict = dict() if no_grad_set is None: - program = loss.block.program assert isinstance(program, framework.Program) - no_grad_set = list() for block in program.blocks: assert isinstance(block, framework.Block) + block_no_grad_set = set() for var in block.vars.itervalues(): assert isinstance(var, framework.Variable) if var.stop_gradient: - no_grad_set.append(var.name) - no_grad_set = set(no_grad_set) + block_no_grad_set.add(_append_grad_suffix_(var.name)) + no_grad_dict[block.idx] = block_no_grad_set + elif isinstance(no_grad_set, set): + no_grad_dict = { + 0: set([_append_grad_suffix_(name) for name in no_grad_set]) + } + else: + raise ValueError("'no_grad_set' should be a set or None.") + + grad_info_map = dict() + root_block = program.block(0) + + fwd_op_num = root_block.desc.op_size() + current_block_idx = program.current_block_idx + grad_to_var = dict() + + _append_backward_ops_(loss, root_block, root_block, no_grad_dict, + grad_to_var) + _append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map) + + program.current_block_idx = current_block_idx + program.sync_with_cpp() - param_grad_map = loss.block.program.append_backward(loss, no_grad_set) if parameter_list is not None: parameters = parameter_list else: - params = loss.block.program.global_block().all_parameters() + params = program.global_block().all_parameters() parameters = [param.name for param in params] params_and_grads = [] for param in parameters: - if param not in param_grad_map: + if param not in grad_info_map: raise ValueError("param %s is not in map" % param) - grad_info = param_grad_map[param] - grad_block = loss.block.program.block(grad_info[1]) + grad_info = grad_info_map[param] + grad_block = grad_info[1] if not grad_block.has_var(grad_info[0]): raise ValueError("grad block[{0}] did not have grad var {1}".format( grad_info[1], grad_info[0])) # Get the param var from the global block - param_var = loss.block.program.global_block().var(param) + param_var = program.global_block().var(param) grad_var = grad_block.var(grad_info[0]) if loss.block.has_var(grad_info[0]): params_and_grads.append((param_var, grad_var)) diff --git a/python/paddle/v2/fluid/data_feeder.py b/python/paddle/v2/fluid/data_feeder.py index 30a542af212926c93381aade426e25f2117e4662..24036c3e75b9594ba58cccb02825ab8020d1e107 100644 --- a/python/paddle/v2/fluid/data_feeder.py +++ b/python/paddle/v2/fluid/data_feeder.py @@ -3,7 +3,7 @@ import core import numpy import six.moves as six -from framework import Variable +from framework import Variable, default_main_program __all__ = ['DataFeeder'] @@ -53,12 +53,16 @@ class DataToLoDTensorConverter(object): class DataFeeder(object): - def __init__(self, feed_list, place): + def __init__(self, feed_list, place, program=None): self.feed_dtypes = [] self.feed_names = [] self.feed_shapes = [] self.feed_lod_level = [] + if program is None: + program = default_main_program() for each_var in feed_list: + if isinstance(each_var, basestring): + each_var = program.block(0).var(each_var) if not isinstance(each_var, Variable): raise TypeError("Feed list should contain a list of variable") self.feed_dtypes.append(each_var.dtype) diff --git a/python/paddle/v2/fluid/distribute_transpiler.py b/python/paddle/v2/fluid/distribute_transpiler.py index 50364c64bec46aabc7be4f4b4370a3ad5b0eb07c..49ece7b725e318d7526d58fe54c97cbe20200a7d 100644 --- a/python/paddle/v2/fluid/distribute_transpiler.py +++ b/python/paddle/v2/fluid/distribute_transpiler.py @@ -95,7 +95,9 @@ class DistributeTranspiler: """ if program is None: program = default_main_program() + self.program = program self.trainers = trainers + self.optimize_ops = optimize_ops self._optimize_distributed( optimize_ops, program, @@ -141,22 +143,25 @@ class DistributeTranspiler: self.param_grad_map = split_method(params_and_grads, pserver_endpoints) send_op_ordered_inputs = [] + send_op_ordered_outputs = [] epmap = [] for ep, v in self.param_grad_map.iteritems(): send_op_ordered_inputs.extend(v["grads"]) + send_op_ordered_outputs.extend(v["params"]) for i in v["grads"]: epmap.append(ep) send_op = program.global_block().append_op( type="send", inputs={"X": send_op_ordered_inputs }, # inputs is a list of tensors to be send - outputs={}, + outputs={"Out": send_op_ordered_outputs}, attrs={"endpoints": pserver_endpoints, "epmap": epmap}) - def get_trainer_program(optimize_ops, program): + def get_trainer_program(self): # remove optimize ops and add a send op to main_program - program.global_block().delete_ops(optimize_ops) + self.program.global_block().delete_ops(self.optimize_ops) + return self.program def _create_var_for_trainers(self, block, var, trainers): var_list = [] @@ -208,7 +213,6 @@ class DistributeTranspiler: if opt_op.inputs.has_key("Grad"): if opt_op.inputs["Grad"].name in grad_var_names: - print "appending ", opt_op.type, opt_op.inputs optimize_sub_program.global_block().append_op( type=opt_op.type, inputs=opt_op.inputs, diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py index 4b4a0820abb9a85f3e9936190c835c2f186107b3..1b2075dcd5ece5706e62431b360d4dc86ea57a89 100644 --- a/python/paddle/v2/fluid/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -1,12 +1,31 @@ import numpy as np +import contextlib +from framework import Program, default_main_program from . import core -from framework import Program, default_main_program, Parameter, Variable -__all__ = ['Executor', 'g_scope'] +__all__ = ['Executor', 'global_scope', 'scope_guard', 'switch_scope'] g_scope = core.Scope() +def global_scope(): + return g_scope + + +def switch_scope(scope): + global g_scope + ex = g_scope + g_scope = scope + return ex + + +@contextlib.contextmanager +def scope_guard(scope): + ex = switch_scope(scope) + yield + switch_scope(ex) + + def as_numpy(tensor): if isinstance(tensor, list): return [as_numpy(t) for t in tensor] @@ -46,14 +65,8 @@ class Executor(object): p.set_place(each) act_places.append(p) - # TODO(dzhwinter) : consider that our fluid tests all written in - # GPUPlace(gpu_id), this will be changed in next PR. - if core.is_compile_gpu(): - core.init_devices(["CPU", "GPU:0"]) - else: - core.init_devices(["CPU"]) - - self.executor = core.Executor(act_places) + # TODO(dzhwinter) : only use the first place + self.executor = core.Executor(act_places[0]) self.places = places def aslodtensor(self, data): @@ -116,7 +129,7 @@ class Executor(object): raise TypeError() if scope is None: - scope = g_scope + scope = global_scope() program = program.clone() global_block = program.global_block() diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index efc05f6563f274bda0072dfde077768da28a0f22..d5ad8bb51380d4c2c71de6374f498e5355c8a65d 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -448,7 +448,7 @@ class Operator(object): no_kernel_op_set = { 'feed', 'fetch', 'save', 'load', 'recurrent', 'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', - 'recv', 'get_places' + 'recv', 'get_places', 'parallel_do' } if type not in no_kernel_op_set: self.desc.infer_var_type(self.block.desc) @@ -659,7 +659,7 @@ class Block(object): end = list(self.ops).index(ops[-1]) except Exception, e: raise e - self.desc.remove_op(start, end) + self.desc.remove_op(start, end + 1) def prepend_op(self, *args, **kwargs): op_desc = self.desc.prepend_op() @@ -842,9 +842,11 @@ class Program(object): self.sync_with_cpp() return param_to_grad_info - def create_block(self): + def create_block(self, parent_idx=None): new_block_idx = len(self.blocks) - self.desc.append_block(self.current_block().desc) + parent = self.current_block() if parent_idx is None else self.block( + parent_idx) + self.desc.append_block(parent.desc) self.current_block_idx = new_block_idx self.blocks.append(Block(self, self.current_block_idx)) return self.current_block() diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index e147ac22ad289eb00c83def66974d875fcdc31f8..c63567601accd8c072368351f2838857bb61c818 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -36,7 +36,7 @@ def save_vars(executor, dirname, main_program=None, vars=None, predicate=None): :param executor: executor that save variable :param dirname: directory path :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default g_program. + program which fit `predicate`. Default default_main_program. :param predicate: The Predicate describes a callable that returns a variable as a bool. If it returns true, the variables will be saved. :param vars: variables need to be saved. If specify vars, program & predicate @@ -180,10 +180,22 @@ def save_inference_model(dirname, :return: None """ + if isinstance(feeded_var_names, basestring): + feeded_var_names = [feeded_var_names] + else: + if not (bool(feeded_var_names) and all( + isinstance(name, basestring) for name in feeded_var_names)): + raise ValueError("'feed_var_names' should be a list of str.") + + if isinstance(target_vars, Variable): + target_vars = [target_vars] + else: + if not (bool(target_vars) and all( + isinstance(var, Variable) for var in target_vars)): + raise ValueError("'target_vars' should be a list of Variable.") + if main_program is None: main_program = default_main_program() - if not isinstance(target_vars, list): - target_vars = [target_vars] if not os.path.isdir(dirname): os.makedirs(dirname) @@ -200,6 +212,11 @@ def save_inference_model(dirname, "fetch_var_names": fetch_var_names }, f, -1) + # Save only programDesc of inference_program in binary format + # in another file: __model__.dat + with open(model_file_name + ".dat", "wb") as fp: + fp.write(inference_program.desc.serialize_to_string()) + save_params(executor, dirname, main_program) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index a076f26f7ff279812f762bd62f72195dc2376378..4469f7285efe1c31d0955c6dd4ba3ecac08070af 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -184,7 +184,7 @@ class LayerHelper(object): self.append_op( type=act_type, inputs={"X": [input_var]}, - outputs={"Y": [tmp]}, + outputs={"Out": [tmp]}, attrs=act) return tmp diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index 22a37c22c3fc777cadcdee6632bbf1fb558fef70..948a67524490ec7dcf84ef9af48be54f0fc0f908 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -6,16 +6,47 @@ import contextlib from ..registry import autodoc __all__ = [ - 'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard', 'StaticRNNGuard', - 'StaticRNNMemoryLink', 'WhileGuard', 'While', 'lod_rank_table', - 'max_sequence_len', 'topk', 'lod_tensor_to_array', 'array_to_lod_tensor', - 'increment', 'array_write', 'create_array', 'less_than', 'array_read', - 'shrink_memory', 'array_length', 'IfElse', 'DynamicRNN', 'ConditionalBlock', - 'StaticRNN', 'reorder_lod_tensor_by_rank' + 'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard', + 'BlockGuardWithCompletion', 'StaticRNNMemoryLink', 'WhileGuard', 'While', + 'lod_rank_table', 'max_sequence_len', 'topk', 'lod_tensor_to_array', + 'array_to_lod_tensor', 'increment', 'array_write', 'create_array', + 'less_than', 'array_read', 'shrink_memory', 'array_length', 'IfElse', + 'DynamicRNN', 'ConditionalBlock', 'StaticRNN', 'reorder_lod_tensor_by_rank', + 'ParallelDo' ] def split_lod_tensor(input, mask, level=0): + """ + **split_lod_tensor** + + This function takes in an input that contains the complete lod information, + and takes in a mask which is used to mask certain parts of the input. + The output is the true branch and the false branch with the mask applied to + the input at a certain level in the tensor. + + Args: + input(tuple|list|None): The input tensor that contains complete + lod information needed to construct the output. + mask(list): A bool column vector which masks the input. + level(int): The specific lod level to rank. + + Returns: + Variable: The true branch of tensor as per the mask applied to input. + Variable: The false branch of tensor as per the mask applied to input. + + Examples: + .. code-block:: python + + x = layers.data(name='x', shape=[1]) + x.persistable = True + + y = layers.data(name='y', shape=[1]) + y.persistable = True + + out_true, out_false = layers.split_lod_tensor( + input=x, mask=y, level=level) + """ helper = LayerHelper('split_lod_tensor', **locals()) out_true = helper.create_tmp_variable(dtype=input.dtype) out_false = helper.create_tmp_variable(dtype=input.dtype) @@ -32,6 +63,40 @@ def split_lod_tensor(input, mask, level=0): def merge_lod_tensor(in_true, in_false, x, mask, level=0): + """ + **merge_lod_tensor** + + This function takes in an input :math:`x`, the True branch, the False + branch and a binary :math:`mask`. Using this information, this function + merges the True and False branches of the tensor into a single Output + at a certain lod level indiacted by :math:`level`. + + Args: + in_true(tuple|list|None): The True branch to be merged. + in_false(tuple|list|None): The False branch to be merged. + x(tuple|list|None): The input tensor that contains complete + lod information needed to construct the output. + mask(list): A bool column vector which masks the input. + level(int): The specific lod level to rank. + + Returns: + Variable: The merged output tensor. + + Examples: + .. code-block:: python + + x = layers.data( + name='x', shape=[1], dtype='float32', stop_gradient=False) + y = layers.data( + name='y', shape=[1], dtype='bool', stop_gradient=False) + + level = 0 + + out_true, out_false = layers.split_lod_tensor( + input=x, mask=y, level=level) + out = layers.merge_lod_tensor( + in_true=out_true, in_false=out_false, mask=y, x=x, level=level) + """ helper = LayerHelper('merge_lod_tensor', **locals()) out = helper.create_tmp_variable(dtype=in_true.dtype) helper.append_op( @@ -68,29 +133,129 @@ class BlockGuard(object): return True -class StaticRNNGuard(BlockGuard): +class ParallelDo(object): + """ + ParallelDo class. + + ParallelDo class is used to create a ParallelDo. + """ + + def __init__(self, places, name=None): + self.helper = LayerHelper("parallel_do", name=name) + self.inputs = [] + self.places = places + self.outputs = [] + self.status = StaticRNN.BEFORE_RNN_BLOCK + + def do(self): + return BlockGuardWithCompletion(self) + + def parent_block(self): + prog = self.helper.main_program + parent_idx = prog.current_block().parent_idx + assert parent_idx >= 0 + parent_block = prog.block(parent_idx) + return parent_block + + def __call__(self, *args, **kwargs): + if self.status != StaticRNN.AFTER_RNN_BLOCK: + raise ValueError("RNN output can only be retrieved after rnn block") + if len(self.outputs) == 0: + raise ValueError("RNN has no output") + elif len(self.outputs) == 1: + return self.outputs[0] + else: + return self.outputs + + def read_input(self, var): + self.inputs.append(var) + return var + + def write_output(self, var): + self.outputs.append(var) + + def get_parameters(self): + main_program = self.helper.main_program + current_block = main_program.current_block() + parent_block = self.parent_block() + + local_inputs = set() + + for op in current_block.ops: + for oname in op.output_names: + for out_var_name in op.output(oname): + local_inputs.add(out_var_name) + + for var in self.inputs: + local_inputs.add(var.name) + + params = list() + for op in current_block.ops: + for iname in op.input_names: + for in_var_name in op.input(iname): + if in_var_name not in local_inputs: + params.append(in_var_name) + + return [parent_block.var(name) for name in params] + + def complete_op(self): + main_program = self.helper.main_program + current_block = main_program.current_block() + parent_block = self.parent_block() + + step_scope = parent_block.create_var( + type=core.VarDesc.VarType.STEP_SCOPES) + + self.outputs = [ + parent_block.create_var( + name=o.name, + shape=o.shape, + dtype=o.dtype, + lod_level=o.lod_level, + persistable=o.persistable, + stop_gradient=o.stop_gradient) for o in self.outputs + ] + + inputs = [parent_block.var(i.name) for i in self.inputs] + outputs = [parent_block.var(o.name) for o in self.outputs] + + parent_block.append_op( + type='parallel_do', + inputs={ + 'inputs': inputs, + 'parameters': self.get_parameters(), + 'places': self.places + }, + outputs={'outputs': outputs, + 'parallel_scopes': [step_scope]}, + attrs={'sub_block': current_block}) + + +class BlockGuardWithCompletion(BlockGuard): """ - StaticRNNGuard class. + BlockGuardWithCompletion class. - StaticRNNGuard class is used to create a StaticRNN block in a program. + BlockGuardWithCompletion class is used to create an op with a block in a program. """ def __init__(self, rnn): - if not isinstance(rnn, StaticRNN): - raise TypeError("StaticRNNGuard takes a StaticRNN") - super(StaticRNNGuard, self).__init__(rnn.helper.main_program) + if not (isinstance(rnn, StaticRNN) or isinstance(rnn, ParallelDo)): + raise TypeError( + "BlockGuardWithCompletion takes a StaticRNN or ParallelDo") + super(BlockGuardWithCompletion, self).__init__(rnn.helper.main_program) self.rnn = rnn def __enter__(self): self.rnn.status = StaticRNN.IN_RNN_BLOCK - return super(StaticRNNGuard, self).__enter__() + return super(BlockGuardWithCompletion, self).__enter__() def __exit__(self, exc_type, exc_val, exc_tb): if exc_type is not None: return False self.rnn.status = StaticRNN.AFTER_RNN_BLOCK - self.rnn.complete_rnn_op() - return super(StaticRNNGuard, self).__exit__(exc_type, exc_val, exc_tb) + self.rnn.complete_op() + return super(BlockGuardWithCompletion, self).__exit__(exc_type, exc_val, + exc_tb) class StaticRNNMemoryLink(object): @@ -136,7 +301,7 @@ class StaticRNN(object): self.seq_len = None def step(self): - return StaticRNNGuard(self) + return BlockGuardWithCompletion(self) def _assert_in_rnn_block_(self, method): if self.status != StaticRNN.IN_RNN_BLOCK: @@ -252,7 +417,7 @@ class StaticRNN(object): else: return self.outputs - def complete_rnn_op(self): + def complete_op(self): main_program = self.helper.main_program rnn_block = main_program.current_block() parent_block = self.parent_block() @@ -397,9 +562,50 @@ class While(object): def lod_rank_table(x, level=0): - """ - This function creates an operator for creating a LOD_RANK_TABLE - using the input x. + """LoD Rank Table Operator. Given an input variable **x** and a level number + of LoD, this layer creates a LodRankTable object. A LoDRankTable object + contains a list of bi-element tuples. Each tuple consists of an index and + a length, both of which are int type. Reffering to specified level of LoD, + the index is the sequence index number and the length representes the + sequence length. Please note that the list is ranked in descending order by + the length. The following is an example: + + .. code-block:: text + + x is a LoDTensor: + x.lod = [[0, 2, 3], + [0, 5, 6, 7]] + x.data = [a, b, c, d, e, f, g] + + 1. set level to 0: + Create lod rank table: + lod_rank_table_obj = lod_rank_table(x, level=0) + + Get: + lod_rank_table_obj.items() = [(0, 2), (1, 1)] + + 2. set level to 1: + Create lod rank table: + lod_rank_table_obj = lod_rank_table(x, level=1) + + Get: + lod_rank_table_obj.items() = [(0, 5), (1, 1), (2, 1)] + + Args: + x (Variable): Input variable, a LoDTensor based which to create the lod + rank table. + level (int): Specify the LoD level, on which to create the lod rank + table. + + Returns: + Variable: The created LoDRankTable object. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[10], + dtype='float32', lod_level=1) + out = layers.lod_rank_table(x=x, level=0) """ helper = LayerHelper("lod_rank_table", **locals()) table = helper.create_variable( @@ -414,9 +620,25 @@ def lod_rank_table(x, level=0): def max_sequence_len(rank_table): - """ - This function creates an operator to calculate the length of - max seqence through input rank_table(should be a lod_rank_table) + """Max Sequence Len Operator. Given a LoDRankTable object, this layer + returns the max length of a batch of sequences. In fact, a LoDRankTable + object contains a list of tuples() and + the list is already sorted by sequence length in descending order, so the + operator just returns the sequence length of the first tuple element. + + Args: + rank_table (Variable): Input variable which is a LoDRankTable object. + + Returns: + Variable: The max length of sequence. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[10], + dtype='float32', lod_level=1) + rank_table = layers.lod_rank_table(x=x, level=0) + max_seq_len = layers.max_sequence_len(rank_table) """ helper = LayerHelper("max_seqence_len", **locals()) res = helper.create_tmp_variable(dtype="int64") @@ -428,6 +650,30 @@ def max_sequence_len(rank_table): def topk(input, k): + """ + **topk** + + This function performs the operation that selects the k entries in the input + vector and outputs their values and indices as vectors. Thus topk_out[j] is + the j-th largest entry in input, and its index is topk_indices[j] + + Args: + input (Variable|list): The input tensor that has all the data. + k (int): The number of top elements that the function will pick. + + Returns: + Variable: The variable of type array that contains the k largest entries + from input. + Variable: The variable of type array that contains the indices of k + largest entries from input. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[10]) + k = 5 + array = fluid.layers.topk(x, k) + """ helper = LayerHelper('topk', **locals()) topk_out = helper.create_tmp_variable(dtype=input.data_type) topk_indices = helper.create_tmp_variable(dtype='int64') @@ -752,7 +998,7 @@ class ConditionalBlock(object): out_list = [ parent_block.var(var_name) for var_name in parent_block.vars - if var_name not in intermediate + if var_name in intermediate ] step_scope = parent_block.create_var( diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 2adce99d052639ec7d9063b1c234c623e7cdb9c6..7feb479d2e3bc396eac53045888175ba54864b66 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -13,8 +13,8 @@ __all__ = [ 'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy', 'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d', 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', - 'lstm_unit', 'reduce_sum', 'reduce_mean', 'sequence_first_step', - 'sequence_last_step' + 'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', + 'sequence_first_step', 'sequence_last_step' ] @@ -64,14 +64,14 @@ def fc(input, is flattened: the first `num_flatten_dims` dimensions will be flatten to form the first dimension of the final matrix (height of the - matrix), and the rest `rank(X) - num_col_dims` + matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to form the second dimension of the final matrix (width of the matrix). For example, suppose `X` is a 6-dimensional tensor with a shape [2, 3, 4, 5, 6], and - `x_num_col_dims` = 3. Then, the flattened matrix + `num_flatten_dims` = 3. Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. - By default, `x_num_col_dims` is set to 1. + By default, `num_flatten_dims` is set to 1. param_attr(ParamAttr|list): The parameter attribute for learnable parameters/weights of the fully connected layer. @@ -151,7 +151,7 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'): Args: input(Variable): Input to the function - size(tuple|list|None): Shape of the look up table parameter + size(tuple|list|None): Shape of the look up table parameter is_sparse(bool): Boolean flag that specifying whether the input is sparse param_attr(ParamAttr): Parameters for this layer dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc @@ -163,8 +163,9 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'): Examples: .. code-block:: python + dict_size = len(dataset.ids) data = fluid.layers.data(name='ids', shape=[32, 32], dtype='float32') - fc = fluid.layers.embedding(input=data, size=16) + fc = fluid.layers.embedding(input=data, size=[dict_size, 16]) """ helper = LayerHelper('embedding', **locals()) @@ -235,21 +236,50 @@ def gru_unit(input, activation='tanh', gate_activation='sigmoid'): """ - GRUUnit Operator implements partial calculations of the GRU unit as following: + GRU unit layer. The equation of a gru step is: - $$ - update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ - reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ - output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ - output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) - $$ + .. math:: + u_t & = actGate(xu_{t} + W_u h_{t-1} + b_u) + + r_t & = actGate(xr_{t} + W_r h_{t-1} + b_r) + + m_t & = actNode(xm_t + W_c dot(r_t, h_{t-1}) + b_m) + + h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1}) + + The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms + of the equation above, the :math:`z_t` is split into 3 parts - + :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to + implement a full GRU unit operator for an input, a fully + connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`. + + The terms :math:`u_t` and :math:`r_t` represent the update and reset gates + of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is + an intermediate candidate hidden output, which is denoted by :math:`m_t`. + This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})` + and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`. + + Args: + input (Variable): The fc transformed input value of current step. + hidden (Variable): The hidden value of lstm unit from previous step. + size (integer): The input dimension value. + weight (ParamAttr): The weight parameters for gru unit. Default: None + bias (ParamAttr): The bias parameters for gru unit. Default: None + activation (string): The activation type for cell (actNode). Default: 'tanh' + gate_activation (string): The activation type for gates (actGate). Default: 'sigmoid' + + Returns: + tuple: The hidden value, reset-hidden value and gate values. - which is same as one time step of GRU Operator. + Examples: + + .. code-block:: python - @note To implement the complete GRU unit, fully-connected operator must be - used before to feed xu, xr and xc as the Input of GRUUnit operator. + # assuming we have x_t_data and prev_hidden of size=10 + x_t = fluid.layers.fc(input=x_t_data, size=30) + hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t, + hidden = prev_hidden) - TODO(ChunweiYan) add more document here """ activation_dict = dict( identity=0, @@ -269,6 +299,7 @@ def gru_unit(input, attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) # create bias + if bias is None: bias_size = [1, 3 * size] bias = helper.create_parameter( @@ -357,7 +388,59 @@ def cos_sim(X, Y, **kwargs): def cross_entropy(input, label, **kwargs): """ - This function computes cross_entropy using the input and label. + **Cross Entropy Layer** + + This layer computes the cross entropy between `input` and `label`. It supports + both standard cross-entropy and soft-label cross-entropy loss computation. + + 1) One-hot cross-entropy: + `soft_label = False`, `Label[i, 0]` indicates the class index for sample i: + + .. math:: + + Y[i] = -\log(X[i, Label[i]]) + + 2) Soft-label cross-entropy: + `soft_label = True`, `Label[i, j]` indicates the soft label of class j + for sample i: + + .. math:: + + Y[i] = \sum_j{-Label[i, j] * log(X[i, j])} + + Please make sure that in this case the summation of each row of `label` + equals one. + + 3) One-hot cross-entropy with vecterized `label`: + As a special case of 2), when each row of 'label' has only one + non-zero element which is equal to 1, soft-label cross-entropy degenerates + to a one-hot cross-entropy with one-hot label representation. + + Args: + input (Variable|list): a 2-D tensor with shape [N x D], where N is the + batch size and D is the number of classes. This input is a probability + computed by the previous operator, which is almost always the result + of a softmax operator. + label (Variable|list): the ground truth which is a 2-D tensor. When + `soft_label` is set to `False`, `label` is a tensor with shape + [N x 1]. When `soft_label` is set to `True`, `label` is a + tensor with shape [N x D]. + soft_label (bool, via `**kwargs`): a flag indicating whether to interpretate + the given labels as soft labels, default `False`. + + Returns: + A 2-D tensor with shape [N x 1], the cross entropy loss. + + Raises: + `ValueError`: 1) the 1st dimension of `input` and `label` are not equal; 2) when \ + `soft_label == True`, and the 2nd dimension of `input` and `label` are not \ + equal; 3) when `soft_label == False`, and the 2nd dimension of `label` is not 1. + + Examples: + .. code-block:: python + + predict = fluid.layers.fc(input=net, size=classdim, act='softmax') + cost = fluid.layers.cross_entropy(input=predict, label=label) """ helper = LayerHelper('cross_entropy', **kwargs) out = helper.create_tmp_variable(dtype=input.dtype) @@ -372,8 +455,36 @@ def cross_entropy(input, label, **kwargs): def square_error_cost(input, label, **kwargs): """ - This functions returns the squared error cost using the input and label. - The output is appending the op to do the above. + **Square error cost layer** + + This layer accepts input predictions and target label and returns the squared error cost. + For predictions, :math:`X`, and target labels, :math:`Y`, the equation is: + + .. math:: + + Out = (X - Y)^2 + + In the above equation: + + * :math:`X`: Input predictions, a tensor. + * :math:`Y`: Input labels, a tensor. + * :math:`Out`: Output value, same shape with :math:`X`. + + Args: + input(Variable): Input tensor, has predictions. + label(Variable): Label tensor, has target labels. + + Returns: + Variable: The tensor variable storing the element-wise squared error difference \ + of input and label. + + Examples: + .. code-block:: python + + y = layers.data(name='y', shape=[1], dtype='float32') + y_predict = layers.data(name='y_predict', shape=[1], dtype='float32') + cost = layers.square_error_cost(input=y_predict, label=y) + """ helper = LayerHelper('square_error_cost', **kwargs) minus_out = helper.create_tmp_variable(dtype=input.dtype) @@ -385,7 +496,8 @@ def square_error_cost(input, label, **kwargs): square_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( - type='square', inputs={'X': [minus_out]}, outputs={'Y': [square_out]}) + type='square', inputs={'X': [minus_out]}, + outputs={'Out': [square_out]}) return square_out @@ -512,14 +624,83 @@ def conv2d(input, groups=None, param_attr=None, bias_attr=None, - act=None, - name=None): + act=None): """ - This function creates the op for a 2-dimensional Convolution. - This is performed using the parameters of filters(size, dimensionality etc) - , stride and other configurations for a Convolution operation. - This funciton can also append an activation on top of the - conv-2d output, if mentioned in the input parameters. + **Convlution2D Layer** + + The convolution2D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input(Input) and Output(Output) + are in NCHW format. Where N is batch size, C is the number of channels, H is the height + of the feature, and W is the width of the feature. + The details of convolution layer, please refer UFLDL's `convolution, + `_ . + If bias attribution and activation type are provided, bias is added to the output of the convolution, + and the corresponding activation function is applied to the final result. + For each input :math:`X`, the equation is: + + + .. math:: + + Out = \sigma (W \\ast X + b) + + In the above equation: + + * :math:`X`: Input value, a tensor with NCHW format. + * :math:`W`: Filter value, a tensor with MCHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + Input: + Input shape: $(N, C_{in}, H_{in}, W_{in})$ + + Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ + + Output: + Output shape: $(N, C_{out}, H_{out}, W_{out})$ + Where + .. math:: + + H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 + + Args: + input(Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + image channel. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + padding(int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + groups(int): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None + bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None + act(str): Activation type. Default: None + + Returns: + Variable: The tensor variable storing the convolution and \ + non-linearity activation result. + + Raises: + ValueError: If the shapes of input, filter_size, stride, padding and groups mismatch. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') + conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu") """ if stride is None: @@ -575,9 +756,9 @@ def conv2d(input, def sequence_pool(input, pool_type, **kwargs): """ - This function add the operator for sequence pooling. - It pools features of all time-steps of each instance, and is applied - on top of the input using pool_type mentioned in the parameters. + This function add the operator for sequence pooling. + It pools features of all time-steps of each instance, and is applied + on top of the input using pool_type mentioned in the parameters. It supports four pool_type: @@ -603,10 +784,10 @@ def sequence_pool(input, pool_type, **kwargs): sqrt : out.data = [2.82, 6.93, 4.24], where 2.82=(1+3)/sqrt(2), 6.93=(2+4+6)/sqrt(3), 4.24=(5+1)/sqrt(2) max : out.data = [3, 6, 5], where 3=max(1,3), 6=max(2,4,6), 5=max(5,1) - + Args: input(variable): The input variable which is a LoDTensor. - pool_type (string): The pooling type of sequence_pool. + pool_type (string): The pooling type of sequence_pool. It supports average, sum, sqrt and max. Returns: @@ -615,8 +796,8 @@ def sequence_pool(input, pool_type, **kwargs): Examples: .. code-block:: python - - x = fluid.layers.data(name='x', shape=[7, 1], + + x = fluid.layers.data(name='x', shape=[7, 1], dtype='float32', lod_level=1) avg_x = fluid.layers.sequence_pool(input=x, pool_type='average') sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum') @@ -653,7 +834,7 @@ def sequence_first_step(input, **kwargs): out.dim = [3, 1] with condition len(x.lod[-1]) - 1 == out.dims[0] out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1) - + Args: input(variable): The input variable which is a LoDTensor. @@ -663,8 +844,8 @@ def sequence_first_step(input, **kwargs): Examples: .. code-block:: python - - x = fluid.layers.data(name='x', shape=[7, 1], + + x = fluid.layers.data(name='x', shape=[7, 1], dtype='float32', lod_level=1) x_first_step = fluid.layers.sequence_first_step(input=x) """ @@ -686,7 +867,7 @@ def sequence_last_step(input, **kwargs): out.dim = [3, 1] with condition len(x.lod[-1]) - 1 == out.dims[0] out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1) - + Args: input(variable): The input variable which is a LoDTensor. @@ -696,8 +877,8 @@ def sequence_last_step(input, **kwargs): Examples: .. code-block:: python - - x = fluid.layers.data(name='x', shape=[7, 1], + + x = fluid.layers.data(name='x', shape=[7, 1], dtype='float32', lod_level=1) x_last_step = fluid.layers.sequence_last_step(input=x) """ @@ -1016,25 +1197,26 @@ def lstm_unit(x_t, .. math:: - i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) + i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i) - f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) + f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + b_f) - c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) + c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t + W_{h_c}h_{t-1} + b_c) - o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) + o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + b_o) h_t & = o_t tanh(c_t) - The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and - :math:`c_{t-1}`. The implementation separates the linear transformation - and non-linear transformation apart. Here, we take :math:`i_t` as an - example. The linear transformation is applied by calling a `fc` layer and - the equation is: + The inputs of lstm unit include :math:`x_t`, :math:`h_{t-1}` and + :math:`c_{t-1}`. The 2nd dimensions of :math:`h_{t-1}` and :math:`c_{t-1}` + should be same. The implementation separates the linear transformation and + non-linear transformation apart. Here, we take :math:`i_t` as an example. + The linear transformation is applied by calling a `fc` layer and the + equation is: .. math:: - L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i + L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i The non-linear transformation is applied by calling `lstm_unit_op` and the equation is: @@ -1046,9 +1228,12 @@ def lstm_unit(x_t, This layer has two outputs including :math:`h_t` and :math:`o_t`. Args: - x_t (Variable): The input value of current step. - hidden_t_prev (Variable): The hidden value of lstm unit. - cell_t_prev (Variable): The cell value of lstm unit. + x_t (Variable): The input value of current step, a 2-D tensor with shape + M x N, M for batch size and N for input size. + hidden_t_prev (Variable): The hidden value of lstm unit, a 2-D tensor + with shape M x S, M for batch size and S for size of lstm unit. + cell_t_prev (Variable): The cell value of lstm unit, a 2-D tensor with + shape M x S, M for batch size and S for size of lstm unit. forget_bias (float): The forget bias of lstm unit. param_attr (ParamAttr): The attributes of parameter weights, used to set initializer, name etc. @@ -1061,14 +1246,15 @@ def lstm_unit(x_t, Raises: ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\ not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \ - and **cell_t_prev** not be the same. + and **cell_t_prev** not be the same or the 2nd dimensions of \ + **hidden_t_prev** and **cell_t_prev** not be the same. Examples: .. code-block:: python x_t = fluid.layers.fc(input=x_t_data, size=10) - prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20) + prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=30) prev_cell = fluid.layers.fc(input=prev_cell_data, size=30) hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t, hidden_t_prev=prev_hidden, @@ -1087,7 +1273,11 @@ def lstm_unit(x_t, if x_t.shape[0] != hidden_t_prev.shape[0] or x_t.shape[ 0] != cell_t_prev.shape[0]: - raise ValueError("The 1s dimension of x_t, hidden_t_prev and " + raise ValueError("The 1st dimensions of x_t, hidden_t_prev and " + "cell_t_prev must be the same.") + + if hidden_t_prev.shape[1] != cell_t_prev.shape[1]: + raise ValueError("The 2nd dimensions of hidden_t_prev and " "cell_t_prev must be the same.") if bias_attr is None: @@ -1116,22 +1306,22 @@ def lstm_unit(x_t, def reduce_sum(input, dim=None, keep_dim=False): """ - Computes the sum of tensor elements over the given dimension. + Computes the sum of tensor elements over the given dimension. Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int|None): The dimension along which the sum is performed. If - :attr:`None`, sum all elements of :attr:`input` and return a - Tensor variable with a single element, otherwise must be in the - range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`, + dim (int|None): The dimension along which the sum is performed. If + :attr:`None`, sum all elements of :attr:`input` and return a + Tensor variable with a single element, otherwise must be in the + range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. - keep_dim (bool): Whether to reserve the reduced dimension in the - output Tensor. The result tensor will have one fewer dimension + keep_dim (bool): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. Returns: Variable: The reduced Tensor variable. - + Examples: .. code-block:: python @@ -1160,22 +1350,22 @@ def reduce_sum(input, dim=None, keep_dim=False): def reduce_mean(input, dim=None, keep_dim=False): """ - Computes the mean of tensor elements over the given dimension. + Computes the mean of tensor elements over the given dimension. Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (int|None): The dimension along which the mean is computed. If - :attr:`None`, compute the mean over all elements of :attr:`input` - and return a Tensor variable with a single element, otherwise - must be in the range :math:`[-rank(input), rank(input))`. If + dim (int|None): The dimension along which the mean is computed. If + :attr:`None`, compute the mean over all elements of :attr:`input` + and return a Tensor variable with a single element, otherwise + must be in the range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. - keep_dim (bool): Whether to reserve the reduced dimension in the - output Tensor. The result tensor will have one fewer dimension + keep_dim (bool): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. Returns: Variable: The reduced Tensor variable. - + Examples: .. code-block:: python @@ -1200,3 +1390,91 @@ def reduce_mean(input, dim=None, keep_dim=False): 'reduce_all': True if dim == None else False }) return out + + +def reduce_max(input, dim=None, keep_dim=False): + """ + Computes the maximum of tensor elements over the given dimension. + + Args: + input (Variable): The input variable which is a Tensor or LoDTensor. + dim (int|None): The dimension along which the maximum is computed. + If :attr:`None`, compute the maximum over all elements of + :attr:`input` and return a Tensor variable with a single element, + otherwise must be in the range :math:`[-rank(input), rank(input))`. + If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. + keep_dim (bool): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension + than the :attr:`input` unless :attr:`keep_dim` is true. + + Returns: + Variable: The reduced Tensor variable. + + Examples: + .. code-block:: python + + # x is a Tensor variable with following elements: + # [[0.2, 0.3, 0.5, 0.9] + # [0.1, 0.2, 0.6, 0.7]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_max(x) # [0.9] + fluid.layers.reduce_max(x, dim=0) # [0.2, 0.3, 0.6, 0.9] + fluid.layers.reduce_max(x, dim=-1) # [0.9, 0.7] + fluid.layers.reduce_max(x, dim=1, keep_dim=True) # [[0.9], [0.7]] + """ + helper = LayerHelper('reduce_max', **locals()) + out = helper.create_tmp_variable(dtype=helper.input_dtype()) + helper.append_op( + type='reduce_max', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': dim if dim != None else 0, + 'keep_dim': keep_dim, + 'reduce_all': True if dim == None else False + }) + return out + + +def reduce_min(input, dim=None, keep_dim=False): + """ + Computes the minimum of tensor elements over the given dimension. + + Args: + input (Variable): The input variable which is a Tensor or LoDTensor. + dim (int|None): The dimension along which the minimum is computed. + If :attr:`None`, compute the minimum over all elements of + :attr:`input` and return a Tensor variable with a single element, + otherwise must be in the range :math:`[-rank(input), rank(input))`. + If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`. + keep_dim (bool): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension + than the :attr:`input` unless :attr:`keep_dim` is true. + + Returns: + Variable: The reduced Tensor variable. + + Examples: + .. code-block:: python + + # x is a Tensor variable with following elements: + # [[0.2, 0.3, 0.5, 0.9] + # [0.1, 0.2, 0.6, 0.7]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_min(x) # [0.1] + fluid.layers.reduce_min(x, dim=0) # [0.1, 0.2, 0.5, 0.7] + fluid.layers.reduce_min(x, dim=-1) # [0.2, 0.1] + fluid.layers.reduce_min(x, dim=1, keep_dim=True) # [[0.2], [0.1]] + """ + helper = LayerHelper('reduce_min', **locals()) + out = helper.create_tmp_variable(dtype=helper.input_dtype()) + helper.append_op( + type='reduce_min', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': dim if dim != None else 0, + 'keep_dim': keep_dim, + 'reduce_all': True if dim == None else False + }) + return out diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index d2ff6841a317aaf6903edadc9213f69ef6c41216..23fe13f9bbf3e81802ac86415472e6aa603711b1 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -1,9 +1,24 @@ from ..registry import register_layer -__all__ = [ - 'mean', 'mul', 'dropout', 'reshape', 'sigmoid', 'scale', 'transpose', - 'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div', - 'elementwise_sub', 'elementwise_mul', 'clip', 'abs', 'sequence_softmax' + +__activations__ = [ + 'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round' ] +__all__ = [ + 'mean', + 'mul', + 'dropout', + 'reshape', + 'scale', + 'transpose', + 'sigmoid_cross_entropy_with_logits', + 'elementwise_add', + 'elementwise_div', + 'elementwise_sub', + 'elementwise_mul', + 'clip', + 'sequence_softmax', +] + __activations__ + for _OP in set(__all__): globals()[_OP] = register_layer(_OP) diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py index e5820d24cd2b34ef53cbb91e2be66efc1b74d315..9ce25a9e0831a49ef3bbc5026181856e6c4cdfcc 100644 --- a/python/paddle/v2/fluid/layers/tensor.py +++ b/python/paddle/v2/fluid/layers/tensor.py @@ -201,15 +201,47 @@ def fill_constant_batch_size_like(input, def ones(shape, dtype): """ - This function performs the same function as fill_constant() declared above - with the constant value being 1.0. + **ones** + + This function creates a tensor of specified *shape* and + *dtype*, and initializes this with 1. + + It also sets *stop_gradient* to True. + + Args: + shape(tuple|list|None): Shape of output tensor + dtype(np.dtype|core.DataType|str): Data type of output tensor + + Returns: + Variable: The tensor variable storing the output + + Examples: + .. code-block:: python + + data = fluid.layers.ones(shape=[1], dtype='int64') """ return fill_constant(value=1.0, **locals()) def zeros(shape, dtype): """ - This function performs the same function as fill_constant() declared above - with the constant value being 0.0. + **zeros** + + This function creates a tensor of specified *shape* and + *dtype*, and initializes this with 0. + + It also sets *stop_gradient* to True. + + Args: + shape(tuple|list|None): Shape of output tensor + dtype(np.dtype|core.DataType|str): Data type of output tensor + + Returns: + Variable: The tensor variable storing the output + + Examples: + .. code-block:: python + + data = fluid.layers.zeros(shape=[1], dtype='int64') """ return fill_constant(value=0.0, **locals()) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index c56a531ed531cf0219e94854ba66c7399e003292..ff3e5315a2c2b115e4ba563f60de4139f248e93a 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -1,7 +1,7 @@ from collections import defaultdict import framework -from backward import append_backward_ops +from backward import append_backward from framework import unique_name, program_guard from initializer import Constant from layer_helper import LayerHelper @@ -194,10 +194,10 @@ class Optimizer(object): no_grad_set=None): """Add operations to minimize `loss` by updating `parameter_list`. - This method combines interface `append_backward_ops()` and + This method combines interface `append_backward()` and `create_optimization_pass()` into one. """ - params_grads = append_backward_ops(loss, parameter_list, no_grad_set) + params_grads = append_backward(loss, parameter_list, no_grad_set) params_grads = append_gradient_clip_ops(params_grads) diff --git a/python/paddle/v2/fluid/profiler.py b/python/paddle/v2/fluid/profiler.py index 2069b713faf41c5c00ceaf47e030864b98c678da..dcecd76224e70d03ed987a5bb104a977a527d218 100644 --- a/python/paddle/v2/fluid/profiler.py +++ b/python/paddle/v2/fluid/profiler.py @@ -1,5 +1,6 @@ import paddle.v2.fluid.core as core from contextlib import contextmanager +import os __all__ = ['CudaProfiler'] @@ -30,17 +31,21 @@ def cuda_profiler(output_file, output_mode=None, config=None): written into this file. output_mode (string) : The output mode has Key-Value pair format and Comma separated values format. It should be 'kvp' or 'csv'. - config (string) : The profiler options and counters can refer to - "Compute Command Line Profiler User Guide". + config (list of string) : The profiler options and counters can refer + to "Compute Command Line Profiler User Guide". """ if output_mode is None: output_mode = 'csv' if output_mode not in ['kvp', 'csv']: raise ValueError("The output mode must be 'kvp' or 'csv'.") config = NVPROF_CONFIG if config is None else config - core.nvprof_init(output_file, output_mode, config) + config_file = 'nvprof_config_file' + with open(config_file, 'wb') as fp: + fp.writelines(["%s\n" % item for item in config]) + core.nvprof_init(output_file, output_mode, config_file) # Enables profiler collection by the active CUDA profiling tool. core.nvprof_start() yield # Disables profiler collection. core.nvprof_stop() + os.remove(config_file) diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index c3591a613acafb268a5bd70618cd4555450bac29..8acd470c5ed5fa8eeda396f1e9182db4ecdd7016 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -170,7 +170,7 @@ def main(): exe.run(fluid.default_startup_program()) - embedding_param = fluid.g_scope.find_var(embedding_name).get_tensor() + embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index fc073f6be8563a363c0f98b9235ae267fa68562d..51bfe2973db7bd2ec4b43bb588be4c1fcfb11e74 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -14,6 +14,7 @@ hidden1 = fluid.layers.fc(input=image, param_attr=fluid.ParamAttr( regularizer=regularizer, clip=fluid.clip.ClipByValue(10))) + hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu', @@ -73,5 +74,9 @@ for pass_id in range(PASS_NUM): + " test_acc=" + str(test_pass_acc)) if test_pass_acc > 0.7: + fluid.io.save_inference_model( + "./recognize_digits_mlp.inference.model/", ["x"], [predict], + exe) exit(0) + exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index db91ca4f9c7d17fb51fc5d65a0464e976d98523c..e3cc2a89371233014dec4ba3d730a866722d3eae 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -125,10 +125,11 @@ def model(): # need cos sim inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) + scale_infer = layers.scale(x=inference, scale=5.0) label = layers.data(name='score', shape=[1], dtype='float32') - square_cost = layers.square_error_cost(input=inference, label=label) + square_cost = layers.square_error_cost(input=scale_infer, label=label) avg_cost = layers.mean(x=square_cost) @@ -141,7 +142,7 @@ def main(): opts = sgd_optimizer.minimize(cost) if USE_GPU: - place = core.GPUPlace(0) + place = core.CUDAPlace(0) else: place = core.CPUPlace() diff --git a/python/paddle/v2/fluid/tests/book/notest_recognize_digits_conv_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py similarity index 76% rename from python/paddle/v2/fluid/tests/book/notest_recognize_digits_conv_dist.py rename to python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py index 2680502efb91061be37a77fbe5b451960fdd15f7..20b4a8b34cd085ae51e6169f0d4eac58b7f3ffb2 100644 --- a/python/paddle/v2/fluid/tests/book/notest_recognize_digits_conv_dist.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py @@ -38,35 +38,43 @@ train_reader = paddle.batch( place = fluid.CPUPlace() exe = fluid.Executor(place) + t = fluid.DistributeTranspiler() +# all parameter server endpoints list for spliting parameters pserver_endpoints = os.getenv("PSERVERS") +# server endpoint for current node +current_endpoint = os.getenv("SERVER_ENDPOINT") +# run as trainer or parameter server training_role = os.getenv("TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver -t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=1) +t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2) if training_role == "PSERVER": - pserver_prog = t.get_pserver_program(pserver_endpoints, optimize_ops) + if not current_endpoint: + print("need env SERVER_ENDPOINT") + exit(1) + pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops) exe.run(fluid.default_startup_program()) exe.run(pserver_prog) elif training_role == "TRAINER": + trainer_prog = t.get_trainer_program() feeder = fluid.DataFeeder(feed_list=[images, label], place=place) exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): accuracy.reset(exe) + batch_id = 0 for data in train_reader(): - loss, acc = exe.run(fluid.default_main_program(), + loss, acc = exe.run(trainer_prog, feed=feeder.feed(data), fetch_list=[avg_cost] + accuracy.metrics) pass_acc = accuracy.eval(exe) - # print loss, acc - if loss < 10.0 and pass_acc > 0.9: - # if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. - exit(0) + if batch_id % 100 == 0: + print("batch_id %d, loss: %f, acc: %f" % + (batch_id, loss, pass_acc)) + batch_id += 1 pass_acc = accuracy.eval(exe) print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) else: print("environment var TRAINER_ROLE should be TRAINER os PSERVER") - -exit(1) diff --git a/python/paddle/v2/fluid/tests/decorators.py b/python/paddle/v2/fluid/tests/decorators.py new file mode 100644 index 0000000000000000000000000000000000000000..154619b0e93455922700a12d734967b4d20c4f13 --- /dev/null +++ b/python/paddle/v2/fluid/tests/decorators.py @@ -0,0 +1,29 @@ +import paddle.v2.fluid as fluid + +__all__ = ['many_times', 'prog_scope'] + + +def many_times(times): + def __impl__(fn): + def __fn__(*args, **kwargs): + for _ in range(times): + fn(*args, **kwargs) + + return __fn__ + + return __impl__ + + +def prog_scope(): + def __impl__(fn): + def __fn__(*args, **kwargs): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + fn(*args, **kwargs) + + return __fn__ + + return __impl__ diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index e83c4a0622013cbfebdf39434ef252412697acb1..b77d2b1268f27c5ec3c34839aaad9b75f0132c2e 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -4,7 +4,7 @@ import random import itertools import paddle.v2.fluid.core as core import collections -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward from paddle.v2.fluid.op import Operator from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.framework import Program, OpProtoHolder @@ -90,12 +90,10 @@ def get_numeric_gradient(scope, def product(dim): return reduce(lambda a, b: a * b, dim, 1) - ctx = core.DeviceContext.create(core.CPUPlace()) - def get_output(): sum = [] for output_name in output_names: - op.run(scope, ctx) + op.run(scope, core.CPUPlace()) sum.append( np.array(scope.find_var(output_name).get_tensor()).mean()) return np.array(sum).mean() @@ -318,7 +316,7 @@ class OpTest(unittest.TestCase): def check_output(self, atol=1e-5): places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu(self.op_type): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) for place in places: self.check_output_with_place(place, atol) @@ -381,7 +379,7 @@ class OpTest(unittest.TestCase): "Gradient Check On %s" % str(cpu_place)) if core.is_compile_gpu() and self.op.support_gpu(): - gpu_place = core.GPUPlace(0) + gpu_place = core.CUDAPlace(0) gpu_analytic_grads = self._get_gradient(inputs_to_check, gpu_place, output_names, no_grad_set) @@ -493,7 +491,7 @@ class OpTest(unittest.TestCase): op_loss.desc.infer_var_type(block.desc) op_loss.desc.infer_shape(block.desc) - param_grad_list = append_backward_ops( + param_grad_list = append_backward( loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) feed_dict = { diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py index b052374dc7ec3c5684d6adfda6b9d000c5e19fe0..03eb7deb9a35933e5a1676a262a371c69151e6d1 100644 --- a/python/paddle/v2/fluid/tests/test_activation_op.py +++ b/python/paddle/v2/fluid/tests/test_activation_op.py @@ -10,13 +10,13 @@ class TestExp(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.exp(self.inputs['X'])} + self.outputs = {'Out': np.exp(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSigmoid(OpTest): @@ -25,13 +25,13 @@ class TestSigmoid(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))} + self.outputs = {'Out': 1 / (1 + np.exp(-self.inputs['X']))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) class TestLogSigmoid(OpTest): @@ -40,13 +40,13 @@ class TestLogSigmoid(OpTest): self.inputs = { 'X': np.random.uniform(-1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.log(1 / (1 + np.exp(-self.inputs['X'])))} + self.outputs = {'Out': np.log(1 / (1 + np.exp(-self.inputs['X'])))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) class TestTanh(OpTest): @@ -55,13 +55,13 @@ class TestTanh(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.tanh(self.inputs['X'])} + self.outputs = {'Out': np.tanh(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestTanhShrink(OpTest): @@ -70,13 +70,13 @@ class TestTanhShrink(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [10, 17]).astype("float32") } - self.outputs = {'Y': self.inputs['X'] - np.tanh(self.inputs['X'])} + self.outputs = {'Out': self.inputs['X'] - np.tanh(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) class TestHardShrink(OpTest): @@ -90,13 +90,13 @@ class TestHardShrink(OpTest): t = np.copy(x) t[(t >= -threshold) & (t <= threshold)] = 0 - self.outputs = {'Y': t} + self.outputs = {'Out': t} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.005) + self.check_grad(['X'], 'Out', max_relative_error=0.005) class TestSoftShrink(OpTest): @@ -110,13 +110,13 @@ class TestSoftShrink(OpTest): y = np.copy(self.inputs['X']) y = (y < -lambda_val) * (y + lambda_val) + (y > lambda_val) * ( y - lambda_val) - self.outputs = {'Y': y} + self.outputs = {'Out': y} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSqrt(OpTest): @@ -125,13 +125,13 @@ class TestSqrt(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.sqrt(self.inputs['X'])} + self.outputs = {'Out': np.sqrt(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestAbs(OpTest): @@ -144,13 +144,13 @@ class TestAbs(OpTest): # we should avoid this x[np.abs(x) < 0.005] = 0.02 self.inputs = {'X': x} - self.outputs = {'Y': np.abs(self.inputs['X'])} + self.outputs = {'Out': np.abs(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestCeil(OpTest): @@ -158,13 +158,13 @@ class TestCeil(OpTest): self.op_type = "ceil" x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} - self.outputs = {'Y': np.ceil(self.inputs['X'])} + self.outputs = {'Out': np.ceil(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestFloor(OpTest): @@ -173,13 +173,13 @@ class TestFloor(OpTest): x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} # numpy floor need +1 - self.outputs = {'Y': np.floor(self.inputs['X']) + 1.0} + self.outputs = {'Out': np.floor(self.inputs['X']) + 1.0} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestRound(OpTest): @@ -187,13 +187,13 @@ class TestRound(OpTest): self.op_type = "round" x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} - self.outputs = {'Y': np.round(self.inputs['X'])} + self.outputs = {'Out': np.round(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestRelu(OpTest): @@ -203,13 +203,13 @@ class TestRelu(OpTest): # The same reason with TestAbs x[np.abs(x) < 0.005] = 0.02 self.inputs = {'X': x} - self.outputs = {'Y': np.maximum(self.inputs['X'], 0)} + self.outputs = {'Out': np.maximum(self.inputs['X'], 0)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestBRelu(OpTest): @@ -227,13 +227,13 @@ class TestBRelu(OpTest): t = np.copy(x) t[t < t_min] = t_min t[t > t_max] = t_max - self.outputs = {'Y': t} + self.outputs = {'Out': t} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestRelu6(OpTest): @@ -248,14 +248,14 @@ class TestRelu6(OpTest): self.inputs = {'X': x} self.attrs = {'threshold': threshold} self.outputs = { - 'Y': np.minimum(np.maximum(self.inputs['X'], 0), threshold) + 'Out': np.minimum(np.maximum(self.inputs['X'], 0), threshold) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestSoftRelu(OpTest): @@ -271,13 +271,13 @@ class TestSoftRelu(OpTest): t = np.copy(x) t[t < -threshold] = -threshold t[t > threshold] = threshold - self.outputs = {'Y': np.log((np.exp(t) + 1))} + self.outputs = {'Out': np.log((np.exp(t) + 1))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestELU(OpTest): @@ -290,27 +290,27 @@ class TestELU(OpTest): self.inputs = {'X': x} self.attrs = {'alpha': alpha} self.outputs = { - 'Y': np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1)) + 'Out': np.maximum(0, x) + np.minimum(0, alpha * (np.exp(x) - 1)) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestReciprocal(OpTest): def setUp(self): self.op_type = "reciprocal" self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")} - self.outputs = {'Y': np.reciprocal(self.inputs['X'])} + self.outputs = {'Out': np.reciprocal(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.01) + self.check_grad(['X'], 'Out', max_relative_error=0.01) class TestLog(OpTest): @@ -319,13 +319,13 @@ class TestLog(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.log(self.inputs['X'])} + self.outputs = {'Out': np.log(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSquare(OpTest): @@ -334,13 +334,13 @@ class TestSquare(OpTest): self.inputs = { 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") } - self.outputs = {'Y': np.square(self.inputs['X'])} + self.outputs = {'Out': np.square(self.inputs['X'])} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestPow(OpTest): @@ -348,13 +348,13 @@ class TestPow(OpTest): self.op_type = "pow" self.inputs = {'X': np.random.uniform(1, 2, [11, 17]).astype("float32")} self.attrs = {'factor': 3.0} - self.outputs = {'Y': np.power(self.inputs['X'], 3)} + self.outputs = {'Out': np.power(self.inputs['X'], 3)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.02) + self.check_grad(['X'], 'Out', max_relative_error=0.02) class TestSTanh(OpTest): @@ -366,13 +366,13 @@ class TestSTanh(OpTest): scale_a = 2.0 / 3.0 scale_b = 1.7159 self.attrs = {'scale_a': scale_a, 'scale_b': scale_b} - self.outputs = {'Y': scale_b * np.tanh(self.inputs['X'] * scale_a)} + self.outputs = {'Out': scale_b * np.tanh(self.inputs['X'] * scale_a)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSoftplus(OpTest): @@ -381,13 +381,13 @@ class TestSoftplus(OpTest): self.inputs = { 'X': np.random.uniform(-1, 1, [11, 17]).astype("float64") } - self.outputs = {'Y': np.log(1 + np.exp(self.inputs['X']))} + self.outputs = {'Out': np.log(1 + np.exp(self.inputs['X']))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestSoftsign(OpTest): @@ -397,14 +397,14 @@ class TestSoftsign(OpTest): 'X': np.random.uniform(-1, 1, [11, 17]).astype("float32") } self.outputs = { - 'Y': np.divide(self.inputs['X'], 1 + np.abs(self.inputs['X'])) + 'Out': np.divide(self.inputs['X'], 1 + np.abs(self.inputs['X'])) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.007) + self.check_grad(['X'], 'Out', max_relative_error=0.007) class TestThresholdedRelu(OpTest): @@ -419,13 +419,13 @@ class TestThresholdedRelu(OpTest): self.inputs = {'X': X} self.attrs = {'threshold': threshold} - self.outputs = {'Y': (X > threshold) * X} + self.outputs = {'Out': (X > threshold) * X} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=self.relative_error) + self.check_grad(['X'], 'Out', max_relative_error=self.relative_error) class TestHardSigmoid(OpTest): @@ -447,13 +447,13 @@ class TestHardSigmoid(OpTest): upper_threshold - 0.2 temp = X * slope + offset - self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))} + self.outputs = {'Out': np.maximum(0.0, np.minimum(1.0, temp))} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.002) + self.check_grad(['X'], 'Out', max_relative_error=0.002) class TestSwish(OpTest): @@ -462,13 +462,13 @@ class TestSwish(OpTest): X = np.random.uniform(0.1, 1, [11, 17]).astype("float32") self.inputs = {'X': X} self.attrs = {'beta': 2.3} - self.outputs = {'Y': X * expit(self.attrs['beta'] * X)} + self.outputs = {'Out': X * expit(self.attrs['beta'] * X)} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y', max_relative_error=0.008) + self.check_grad(['X'], 'Out', max_relative_error=0.008) if __name__ == "__main__": diff --git a/python/paddle/v2/fluid/tests/test_adagrad_op.py b/python/paddle/v2/fluid/tests/test_adagrad_op.py index 903e84c32887100bbeef6ebf81f66f06f084fab5..7b2d02fbf4256d2c27383a3452d526271af543a3 100644 --- a/python/paddle/v2/fluid/tests/test_adagrad_op.py +++ b/python/paddle/v2/fluid/tests/test_adagrad_op.py @@ -113,8 +113,7 @@ class TestSparseAdagradOp(unittest.TestCase): LearningRate='LearningRate', epsilon=2.0) - ctx = core.DeviceContext.create(place) - adagrad_op.run(scope, ctx) + adagrad_op.run(scope, place) # get and compare moment result moment_result_array = np.array(moment) @@ -168,7 +167,7 @@ class TestSparseAdagradOp(unittest.TestCase): def test_sparse_adagrad(self): places = [core.CPUPlace()] if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place) diff --git a/python/paddle/v2/fluid/tests/test_adam_op.py b/python/paddle/v2/fluid/tests/test_adam_op.py index a0d6655d4cbcff8ed3d55df0f4e68fc6591fbb11..7dbc2fa0858a68c5da9e8d48dcb187494357e940 100644 --- a/python/paddle/v2/fluid/tests/test_adam_op.py +++ b/python/paddle/v2/fluid/tests/test_adam_op.py @@ -1,6 +1,8 @@ import unittest import numpy as np from op_test import OpTest +from paddle.v2.fluid import core +from paddle.v2.fluid.op import Operator class TestAdamOp1(OpTest): @@ -176,5 +178,124 @@ def adam_step(inputs, attributes): return param_out, moment1_out, moment2_out +def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad): + ''' + Simulate one step of the adam optimizer + :param inputs: dict of inputs + :param attributes: dict of attributes + :return tuple: tuple of output param, moment1, moment2, + beta1 power accumulator and beta2 power accumulator + ''' + param = inputs['Param'] + # grad = inputs['Grad'] + moment1 = inputs['Moment1'] + moment2 = inputs['Moment2'] + lr = inputs['LearningRate'] + beta1_pow = inputs['Beta1Pow'] + beta2_pow = inputs['Beta2Pow'] + + beta1 = attributes['beta1'] + beta2 = attributes['beta2'] + epsilon = attributes['epsilon'] + + moment1_out = np.zeros(shape=[height, row_numel]) + moment2_out = np.zeros(shape=[height, row_numel]) + param_out = np.zeros(shape=[height, row_numel]) + + for idx, row_id in enumerate(rows): + moment1_out[row_id] = beta1 * moment1[row_id] + (1 - beta1 + ) * np_grad[idx] + moment2_out[row_id] = beta2 * moment2[row_id] + ( + 1 - beta2) * np.square(np_grad[idx]) + lr_t = lr * np.sqrt(1 - beta2_pow) / (1 - beta1_pow) + param_out[row_id] = param[row_id] - lr_t * (moment1_out[row_id] / ( + np.sqrt(moment2_out[row_id]) + epsilon)) + return param_out, moment1_out, moment2_out + + +class TestSparseAdamOp(unittest.TestCase): + def setup(self, scope, place): + beta1 = 0.78 + beta2 = 0.836 + epsilon = 1e-4 + + height = 10 + rows = [0, 4, 7] + self.rows = rows + row_numel = 12 + self.row_numel = row_numel + self.dense_inputs = { + "Param": np.full((height, row_numel), 5.0).astype("float32"), + "Moment1": np.full((height, row_numel), 5.0).astype("float32"), + "Moment2": np.full((height, row_numel), 5.0).astype("float32"), + 'Beta1Pow': np.array([beta1**10]).astype("float32"), + 'Beta2Pow': np.array([beta2**10]).astype("float32"), + "LearningRate": np.full((1), 2.0).astype("float32") + } + self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + + grad_selected_rows = scope.var('Grad').get_selected_rows() + grad_selected_rows.set_height(height) + grad_selected_rows.set_rows(rows) + np_array = np.ones((len(rows), row_numel)).astype("float32") + np_array[0, 0] = 2.0 + np_array[2, 8] = 4.0 + + grad_tensor = grad_selected_rows.get_tensor() + grad_tensor.set(np_array, place) + + self.sparse_inputs = ["Grad"] + + param_out, mom1, mom2 = adam_step_sparse( + self.dense_inputs, self.attrs, height, rows, row_numel, np_array) + self.outputs = { + "ParamOut": param_out, + "Moment1Out": mom1, + "Moment2Out": mom2 + } + + def check_with_place(self, place): + scope = core.Scope() + self.setup(scope, place) + + op_args = dict() + for key, np_array in self.dense_inputs.iteritems(): + var = scope.var(key).get_tensor() + var.set(np_array, place) + op_args[key] = key + for s in self.sparse_inputs: + op_args[s] = s + for s in self.outputs: + var = scope.var(s).get_tensor() + var.set(self.outputs[s], place) + op_args[s] = s + for k in self.attrs: + op_args[k] = self.attrs[k] + + # create and run sgd operator + adam_op = Operator("adam", **op_args) + adam_op.run(scope, place) + + for key, np_array in self.outputs.iteritems(): + out_var = scope.var(key).get_tensor() + actual = np.array(out_var) + actual = actual.reshape([actual.size]) + np_array = np_array.reshape([np_array.size]) + for idx, row_id in enumerate(self.rows): + j = 0 + while j < self.row_numel: + pos = row_id * self.row_numel + j + self.assertLess((actual[pos] - np_array[pos]) / actual[pos], + 0.00001) + j += 1 + + def test_sparse_sgd(self): + places = [core.CPUPlace()] + if core.is_compile_gpu(): + places.append(core.CUDAPlace(0)) + for place in places: + self.check_with_place(place) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_array_read_write_op.py b/python/paddle/v2/fluid/tests/test_array_read_write_op.py index f6120aedecf1015c279b8f218f5e37f2e598ab91..01321de8eac34d562d99726b1f4125d1932ab40f 100644 --- a/python/paddle/v2/fluid/tests/test_array_read_write_op.py +++ b/python/paddle/v2/fluid/tests/test_array_read_write_op.py @@ -2,7 +2,7 @@ import unittest import paddle.v2.fluid.core as core import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward from paddle.v2.fluid.framework import default_main_program import numpy @@ -64,7 +64,7 @@ class TestArrayReadWrite(unittest.TestCase): total_sum = layers.sums(input=[a_sum, x_sum]) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) - append_backward_ops(total_sum_scaled) + append_backward(total_sum_scaled) g_vars = map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x]) diff --git a/python/paddle/v2/fluid/tests/test_batch_norm_op.py b/python/paddle/v2/fluid/tests/test_batch_norm_op.py index a9c0b1cfd3417d0583fa9d4e15550e7543a6bd19..abbd48d2b843cedb77caffc13413d2f9695defa6 100644 --- a/python/paddle/v2/fluid/tests/test_batch_norm_op.py +++ b/python/paddle/v2/fluid/tests/test_batch_norm_op.py @@ -296,8 +296,7 @@ class TestBatchNormOp(OpTest): momentum=momentum, epsilon=epsilon) - ctx = core.DeviceContext.create(place) - batch_norm_op.run(scope, ctx) + batch_norm_op.run(scope, place) # check forward result self.__assert_close(y_tensor, y_out, "y_out") @@ -305,7 +304,7 @@ class TestBatchNormOp(OpTest): self.__assert_close(saved_variance_tensor, saved_variance, "saved_variance") self.__assert_close(mean_out_tensor, mean_out, "mean_out") - if isinstance(place, core.GPUPlace): + if isinstance(place, core.CUDAPlace): atol = 5e-2 else: atol = 1e-4 @@ -320,7 +319,7 @@ class TestBatchNormOp(OpTest): ["y_out", "mean", "variance", "saved_mean", "saved_variance"], place, feed_dict={"y_out": y_grad}) - batch_norm_op_grad.run(scope, ctx) + batch_norm_op_grad.run(scope, place) x_grad_tensor = create_or_get_tensor(scope, grad_var_name("x_val"), None, @@ -340,7 +339,7 @@ class TestBatchNormOp(OpTest): places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu("batch_norm"): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) core.init_devices(["CPU", "GPU:0"]) else: diff --git a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py index 5fad7d8cce5af3677aa77dc0abb64f1ecd380419..f329214dce407fe0382c51b29f0f4c33b562541a 100644 --- a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py +++ b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py @@ -57,8 +57,7 @@ class TestBeamSearchDecodeOp(unittest.TestCase): SentenceIds="sentence_ids", SentenceScores="sentence_scores") - ctx = core.DeviceContext.create(self.cpu_place) - beam_search_decode_op.run(self.scope, ctx) + beam_search_decode_op.run(self.scope, self.cpu_place) expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]] self.assertEqual(sentence_ids.lod(), expected_lod) diff --git a/python/paddle/v2/fluid/tests/test_beam_search_op.py b/python/paddle/v2/fluid/tests/test_beam_search_op.py index cc7c09bb59de3f83e47b4d95c1203f7f050c5132..595f132fa85f0a65f15d9ac31ad320e567c96358 100644 --- a/python/paddle/v2/fluid/tests/test_beam_search_op.py +++ b/python/paddle/v2/fluid/tests/test_beam_search_op.py @@ -14,7 +14,6 @@ def create_tensor(scope, name, np_data): class BeamSearchOpTester(unittest.TestCase): def setUp(self): self.scope = core.Scope() - self.ctx = core.DeviceContext.create(core.CPUPlace()) self._create_ids() self._create_scores() self._create_pre_ids() @@ -32,7 +31,7 @@ class BeamSearchOpTester(unittest.TestCase): level=0, beam_size=2, end_id=0, ) - op.run(self.scope, self.ctx) + op.run(self.scope, core.CPUPlace()) selected_ids = self.scope.find_var("selected_ids").get_tensor() print 'selected_ids', np.array(selected_ids) print 'lod', selected_ids.lod() diff --git a/python/paddle/v2/fluid/tests/test_cond_op.py b/python/paddle/v2/fluid/tests/test_cond_op.py index 9d1df44b9065f8101e90b87815660f8c0818645f..32e54084e48cf77c569db4dee54a0c89d5108373 100644 --- a/python/paddle/v2/fluid/tests/test_cond_op.py +++ b/python/paddle/v2/fluid/tests/test_cond_op.py @@ -65,8 +65,7 @@ class TestCondOp(unittest.TestCase): self.create_global_variables() self.create_cond_op() self.create_sub_net() - ctx = core.DeviceContext.create(core.CPUPlace()) - self.condop.run(self.scope, ctx) + self.condop.run(self.scope, core.CPUPlace()) return np.array(self.scope.find_var("Out").get_tensor()) def create_global_variables(self): diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py index 2b9d8f351a2836cd723d629d4790de1e068d0ea3..7d815123f3454d1457f59202219f9a93bf3d8c31 100644 --- a/python/paddle/v2/fluid/tests/test_conditional_block.py +++ b/python/paddle/v2/fluid/tests/test_conditional_block.py @@ -3,7 +3,7 @@ import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core from paddle.v2.fluid.framework import default_startup_program, default_main_program from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward import numpy @@ -26,7 +26,7 @@ class ConditionalBlock(unittest.TestCase): outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(x=out) - append_backward_ops(loss=loss) + append_backward(loss=loss) outs = exe.run( feed={'X': x}, fetch_list=[ diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py index e82e3ab0c9c0bc75a13a8948fda925bc4f0b6512..958300e655e012b91598360105ca2734c3bd2c37 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py @@ -1,5 +1,7 @@ import unittest import numpy as np + +import paddle.v2.fluid.core as core from op_test import OpTest @@ -47,6 +49,7 @@ def conv2d_forward_naive(input, filter, group, conv_param): class TestConv2dOp(OpTest): def setUp(self): + core.use_cuda() self.init_op_type() self.init_group() self.init_dilation() @@ -167,26 +170,31 @@ class TestWithDilation(TestConv2dOp): #----------------Conv2dCudnn---------------- class TestCudnn(TestConv2dOp): def init_op_type(self): + core.use_cudnn() self.op_type = "conv2d_cudnn" class TestCudnnWithPad(TestWithPad): def init_op_type(self): + core.use_cudnn() self.op_type = "conv2d_cudnn" class TestCudnnWithStride(TestWithStride): def init_op_type(self): + core.use_cudnn() self.op_type = "conv2d_cudnn" class TestCudnnWithGroup(TestWithGroup): def init_op_type(self): + core.use_cudnn() self.op_type = "conv2d_cudnn" class TestCudnnWith1x1(TestWith1x1): def init_op_type(self): + core.use_cudnn() self.op_type = "conv2d_cudnn" diff --git a/python/paddle/v2/fluid/tests/test_detection_output_op.py b/python/paddle/v2/fluid/tests/test_detection_output_op.py new file mode 100644 index 0000000000000000000000000000000000000000..080a9743b0182cb7e6dd0030fc306a7f82510a05 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_detection_output_op.py @@ -0,0 +1,57 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestUnpoolOp(OpTest): + def setUp(self): + self.op_type = "detection_output" + self.init_test_case() + + #loc.shape ((1, 4, 4, 1, 1)) + #conf.shape ((1, 4, 2, 1, 1)) + + loc = np.array([[[[[0.1]], [[0.1]], [[0.1]], [[0.1]]], + [[[0.1]], [[0.1]], [[0.1]], [[0.1]]], + [[[0.1]], [[0.1]], [[0.1]], [[0.1]]], + [[[0.1]], [[0.1]], [[0.1]], [[0.1]]]]]) + conf = np.array([[[[[0.1]], [[0.9]]], [[[0.2]], [[0.8]]], + [[[0.3]], [[0.7]]], [[[0.4]], [[0.6]]]]]) + priorbox = np.array([ + 0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.6, 0.6, 0.1, + 0.1, 0.2, 0.2, 0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2, 0.4, 0.4, + 0.8, 0.8, 0.1, 0.1, 0.2, 0.2 + ]) + + output = np.array([ + 0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031 + ]) + self.inputs = { + 'Loc': loc.astype('float32'), + 'Conf': conf.astype('float32'), + 'PriorBox': priorbox.astype('float32') + } + self.attrs = { + 'num_classes': self.num_classes, + 'top_k': self.top_k, + 'nms_top_k': self.nms_top_k, + 'background_label_id': self.background_label_id, + 'nms_threshold': self.nms_threshold, + 'confidence_threshold': self.confidence_threshold, + } + self.outputs = {'Out': output.astype('float32')} + + def test_check_output(self): + self.check_output() + + def init_test_case(self): + self.num_classes = 2 + self.top_k = 10 + self.nms_top_k = 20 + self.background_label_id = 0 + self.nms_threshold = 0.01 + self.confidence_threshold = 0.01 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py new file mode 100644 index 0000000000000000000000000000000000000000..c02c59284e1ca2e28ba2f6c5ec13b241c15fc288 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -0,0 +1,347 @@ +import numpy +import random +import collections +import paddle.v2.fluid as fluid +import unittest +from decorators import * + + +class Memory(object): + def __init__(self, shape, dtype='float32'): + self.ex = numpy.zeros(shape=shape, dtype=dtype) + self.cur = None + + def update(self, val): + assert val.shape == self.ex.shape + assert val.dtype == self.ex.dtype + self.cur = val + + def ex(self): + return self.ex + + def next(self): + self.ex = self.cur + self.cur = None + + def __next__(self): + self.next() + + def reset(self): + self.ex = numpy.zeros(shape=self.ex.shape, dtype=self.ex.dtype) + self.cur = None + + +class Output(object): + def __init__(self): + self.outs = [] + + def next_sequence(self): + self.outs.append([]) + + def out(self, val): + self.outs[-1].append(val) + + def last(self): + return self.outs[-1][-1] + + +class BaseRNN(object): + def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15): + self.num_seq = num_seq + self.inputs = collections.defaultdict(list) + + for _ in xrange(num_seq): + seq_len = random.randint(1, max_seq_len - 1) + for iname in ins: + ishape = ins[iname].get('shape', None) + idtype = ins[iname].get('dtype', 'float32') + lst = [] + for _ in xrange(seq_len): + lst.append(numpy.random.random(size=ishape).astype(idtype)) + self.inputs[iname].append(lst) + + self.mems = dict() + for mname in mems: + mshape = mems[mname].get('shape', None) + mdtype = mems[mname].get('dtype', 'float32') + self.mems[mname] = Memory(shape=mshape, dtype=mdtype) + + self.params = dict() + for pname in params: + pshape = params[pname].get('shape', None) + pdtype = params[pname].get('dtype', 'float32') + self.params[pname] = numpy.random.random(size=pshape).astype(pdtype) + + self.outputs = dict() + + for oname in outs: + self.outputs[oname] = Output() + + def step(self, **kwargs): + raise NotImplementedError() + + def exe(self): + retv = dict() + for out in self.outputs: + retv[out] = [] + + for seq_id in xrange(self.num_seq): + for mname in self.mems: + self.mems[mname].reset() + for out in self.outputs: + self.outputs[out].next_sequence() + + iname0 = self.inputs.keys()[0] + seq_len = len(self.inputs[iname0][seq_id]) + + for step_id in xrange(seq_len): + xargs = dict() + + for iname in self.inputs: + xargs[iname] = self.inputs[iname][seq_id][step_id] + + for mname in self.mems: + xargs[mname] = self.mems[mname] + + for pname in self.params: + xargs[pname] = self.params[pname] + + for out in self.outputs: + xargs[out] = self.outputs[out] + + self.step(**xargs) + + for mname in self.mems: + next(self.mems[mname]) + + for out in self.outputs: + retv[out].append(self.outputs[out].last()) + + for out in retv: + retv[out] = numpy.array(retv[out]) + return retv + + def to_feed(self, place): + feed_dict = dict() + + for iname in self.inputs: + lod = [0] + np_flatten = [] + for seq_id in xrange(len(self.inputs[iname])): + seq_len = len(self.inputs[iname][seq_id]) + lod.append(lod[-1] + seq_len) + np_flatten.extend(self.inputs[iname][seq_id]) + + t = fluid.Tensor() + t.set(numpy.array(np_flatten), place) + t.set_lod([lod]) + feed_dict[iname] = t + + for pname in self.params: + feed_dict[pname] = self.params[pname] + return feed_dict + + def get_numeric_gradient_of_param(self, param_name, delta=0.001): + p = self.params[param_name] + if len(p.shape) != 2: + raise ValueError("Not support get numeric gradient of an parameter," + " which is not matrix") + g = numpy.zeros(shape=p.shape, dtype=p.dtype) + + for i in xrange(p.shape[0]): + for j in xrange(p.shape[1]): + o = p[i][j] + p[i][j] += delta + pos = self._exe_mean_out_() + p[i][j] -= 2 * delta + neg = self._exe_mean_out_() + p[i][j] = o + g[i][j] = (pos - neg) / (delta * 2) + return g + + def get_numeric_gradient_of_input(self, + input_name, + delta=0.001, + return_one_tensor=True): + ipt = self.inputs[input_name] + grad = [] + + for seq in ipt: + seq_grad = [] + for item in seq: + item_grad = numpy.zeros(shape=item.shape, dtype=item.dtype) + if len(item.shape) != 1: + raise ValueError("Not support") + + for i in xrange(len(item)): + o = item[i] + item[i] += delta + pos = self._exe_mean_out_() + item[i] -= 2 * delta + neg = self._exe_mean_out_() + item[i] = o + item_grad[i] = (pos - neg) / (delta * 2) + seq_grad.append(item_grad) + grad.append(seq_grad) + + if not return_one_tensor: + return grad + + for i in xrange(len(grad)): + grad[i] = numpy.concatenate(grad[i]) + grad = numpy.concatenate(grad) + return grad + + def _exe_mean_out_(self): + outs = self.exe() + return numpy.array([o.mean() for o in outs.itervalues()]).mean() + + +class TestSimpleMul(unittest.TestCase): + DATA_NAME = 'X' + DATA_WIDTH = 32 + PARAM_NAME = 'W' + HIDDEN_WIDTH = 10 + OUT_NAME = 'Out' + + class SimpleMul(BaseRNN): + def __init__(self): + base = TestSimpleMul + super(base.SimpleMul, self).__init__({ + base.DATA_NAME: { + 'shape': [base.DATA_WIDTH] + } + }, {}, { + base.PARAM_NAME: { + 'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH] + } + }, [base.OUT_NAME]) + + def step(self, X, W, Out): + Out.out(numpy.matmul(X, W)) + + # Test many times in local to ensure the random seed cannot breaks CI + # @many_times(10) + @prog_scope() + def test_forward_backward(self): + py_rnn = TestSimpleMul.SimpleMul() + dat = fluid.layers.data( + name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) + dat.stop_gradient = False + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + d = rnn.step_input(dat) + o = fluid.layers.fc(input=d, + param_attr=self.PARAM_NAME, + bias_attr=False, + size=self.HIDDEN_WIDTH, + act=None) + rnn.output(o) + + out = rnn() + out = fluid.layers.sequence_pool(out, pool_type='last') + loss = fluid.layers.mean(x=out) + fluid.backward.append_backward(loss) + + cpu = fluid.CPUPlace() + exe = fluid.Executor(cpu) + out, w_g, i_g = map(numpy.array, + exe.run(feed=py_rnn.to_feed(cpu), + fetch_list=[ + out, self.PARAM_NAME + "@GRAD", + self.DATA_NAME + "@GRAD" + ], + return_numpy=False)) + out_by_python = py_rnn.exe()[self.OUT_NAME] + self.assertTrue(numpy.allclose(out, out_by_python)) + w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME) + self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.05)) + i_g_num = py_rnn.get_numeric_gradient_of_input( + input_name=self.DATA_NAME) + i_g_num = i_g_num.reshape(i_g.shape) + self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.05)) + + +class TestSimpleMulWithMemory(unittest.TestCase): + DATA_WIDTH = 32 + HIDDEN_WIDTH = 20 + DATA_NAME = 'X' + PARAM_NAME = 'W' + + class SimpleMulWithMemory(BaseRNN): + def __init__(self): + super(TestSimpleMulWithMemory.SimpleMulWithMemory, self).__init__({ + TestSimpleMulWithMemory.DATA_NAME: { + 'shape': [TestSimpleMulWithMemory.DATA_WIDTH] + } + }, {'Mem': { + 'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH] + }}, { + TestSimpleMulWithMemory.PARAM_NAME: { + 'shape': [ + TestSimpleMulWithMemory.DATA_WIDTH, + TestSimpleMulWithMemory.HIDDEN_WIDTH + ] + } + }, ['Out']) + + def step(self, X, Mem, W, Out): + o = numpy.matmul(X, W) + assert isinstance(Mem, Memory) + o += Mem.ex + Mem.update(o) + assert isinstance(Out, Output) + Out.out(o) + + # many_times used locally for debug. Make sure the calculation is stable. + # @many_times(10) + @prog_scope() + def test_forward_backward(self): + py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory() + data = fluid.layers.data( + name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) + data.stop_gradient = False + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + d = rnn.step_input(data) + mem = rnn.memory(value=0.0, shape=[self.HIDDEN_WIDTH]) + hidden = fluid.layers.fc(input=d, + size=self.HIDDEN_WIDTH, + param_attr=self.PARAM_NAME, + bias_attr=False, + act=None) + o = fluid.layers.elementwise_add(x=hidden, y=mem) + rnn.update_memory(mem, o) + rnn.output(o) + + out = rnn() + last = fluid.layers.sequence_pool(input=out, pool_type='last') + loss = fluid.layers.mean(x=last) + fluid.backward.append_backward(loss) + + cpu = fluid.CPUPlace() + exe = fluid.Executor(cpu) + feed = py_rnn.to_feed(cpu) + last_np, w_g, i_g = map(numpy.array, + exe.run(feed=feed, + fetch_list=[ + last, self.PARAM_NAME + "@GRAD", + self.DATA_NAME + "@GRAD" + ], + return_numpy=False)) + last_by_py, = py_rnn.exe().values() + w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME) + self.assertTrue(numpy.allclose(last_np, last_by_py)) + + self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.1)) + i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME) + i_g_num = i_g_num.reshape(i_g.shape) + + # Since this RNN has many float add. The number could be not stable. + # rtol = 0.1 + self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.1)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py index a9d943b8b7f7d9bc0dec89c5360769e0328527ba..6f6a60ccb3ff17f6a12eec6974b8b2d73885c29f 100644 --- a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py @@ -20,11 +20,10 @@ class TestGaussianRandomOp(unittest.TestCase): def test_gpu(self): if core.is_compile_gpu(): - self.gaussian_random_test(place=fluid.GPUPlace(0)) + self.gaussian_random_test(place=fluid.CUDAPlace(0)) def gaussian_random_test(self, place): - context = core.DeviceContext.create(place) program = fluid.Program() block = program.global_block() vout = block.create_var(name="Out") diff --git a/python/paddle/v2/fluid/tests/test_is_empty_op.py b/python/paddle/v2/fluid/tests/test_is_empty_op.py index ed6e3fe24f6333c9c90d760787eb13241a7e1868..0a4dd0f4faf370161e5695d97f0ed4bf73b6ec26 100644 --- a/python/paddle/v2/fluid/tests/test_is_empty_op.py +++ b/python/paddle/v2/fluid/tests/test_is_empty_op.py @@ -33,8 +33,7 @@ class TestIsEmptyOp(unittest.TestCase): def one_case(self, input, target): op = Operator(type="is_empty", X=input, Out="out") - ctx = core.DeviceContext.create(core.CPUPlace()) - op.run(self.scope, ctx) + op.run(self.scope, core.CPUPlace()) out = self.scope.var("out").get_tensor() self.assertEqual(np.array(out)[0], target) diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index c851f37b23f056e06c0b4c6835820cc1f15605b0..69cb23524fb0b68f9973810a11515452407802b8 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -177,8 +177,8 @@ class TestBook(unittest.TestCase): name='x_t_data', shape=[10, 10], dtype='float32') x_t = layers.fc(input=x_t_data, size=10) prev_hidden_data = layers.data( - name='prev_hidden_data', shape=[10, 20], dtype='float32') - prev_hidden = layers.fc(input=prev_hidden_data, size=20) + name='prev_hidden_data', shape=[10, 30], dtype='float32') + prev_hidden = layers.fc(input=prev_hidden_data, size=30) prev_cell_data = layers.data( name='prev_cell', shape=[10, 30], dtype='float32') prev_cell = layers.fc(input=prev_cell_data, size=30) diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py index 5fdabbcf889448114ac4e55e7944cb6c57ba5f3c..c552cb033f1ec8f5843490083edee7b2762b5703 100644 --- a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py +++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py @@ -4,7 +4,7 @@ import numpy import paddle.v2.fluid.layers as layers from paddle.v2.fluid.framework import Program, program_guard from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward class TestCPULoDTensorArrayOps(unittest.TestCase): @@ -170,7 +170,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): mean = layers.mean(x=result) - append_backward_ops(mean) + append_backward(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) diff --git a/python/paddle/v2/fluid/tests/test_net.py b/python/paddle/v2/fluid/tests/test_net.py index 318df08a9e73ac95cab73c34182bc6220ef6c681..d9fe55a8af5c750c5c926e875ddbb645f8abb1a0 100644 --- a/python/paddle/v2/fluid/tests/test_net.py +++ b/python/paddle/v2/fluid/tests/test_net.py @@ -7,7 +7,7 @@ def fc(X, W, Y): ret_v = core.Net.create() ret_v.append_op(Operator("mul", X="X", Y="W", Out="pre_activation")) - ret_v.append_op(Operator("sigmoid", X="pre_activation", Y=Y)) + ret_v.append_op(Operator("sigmoid", X="pre_activation", Out=Y)) ret_v.complete_add_op(True) return ret_v @@ -30,7 +30,7 @@ Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]} Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. - Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Y[fc.out]}. + Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Out[fc.out]}. ''' self.assertEqual(expected, "\n" + str(net)) diff --git a/python/paddle/v2/fluid/tests/test_norm_op.py b/python/paddle/v2/fluid/tests/test_norm_op.py new file mode 100644 index 0000000000000000000000000000000000000000..7d56320489b24c5547e045cb51b778851ff94a32 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_norm_op.py @@ -0,0 +1,57 @@ +import unittest +import numpy as np +from op_test import OpTest + + +def norm(input, scale, epsilon): + s0, s1, s2, s3 = input.shape + x_square = input * input + for i in xrange(s0): + input_batch = input[i:i + 1, :, :, :] + input_batch = input_batch.reshape(s1, s2 * s3) + x_square_batch = x_square[i:i + 1, :, :, :] + x_square_batch = x_square_batch.reshape(s1, s2 * s3) + square_colsum = x_square_batch.sum(axis=0) + epsilon + tmp = pow(square_colsum, 0.5) + tmp = np.reciprocal(tmp) + tmp_tile = np.tile(tmp, s1) + tmp_tile = tmp_tile.reshape(s1, s2 * s3) + scale_tile = np.tile(scale, (1, s2 * s3)) + scale_tile = scale_tile.reshape(s1, s2 * s3) + out_batch = input_batch * tmp_tile * scale_tile + out_batch = out_batch.reshape(1, s1, s2, s3) + if i == 0: + out = out_batch + else: + out = np.concatenate((out, out_batch), 0) + out.reshape(s0, s1, s2, s3) + return out + + +class TestNormOp(OpTest): + def setUp(self): + self.op_type = "norm" + self.init_test_case() + input = np.random.random(self.shape).astype("float32") + scale = np.array([10, 10, 10]) + self.inputs = { + 'X': input.astype('float32'), + 'Scale': scale.astype('float32') + } + self.attrs = {'epsilon': self.epsilon} + output = norm(input, scale, self.epsilon) + self.outputs = {'Out': output.astype('float32')} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + def init_test_case(self): + self.shape = [2, 3, 2, 2] + self.epsilon = 1e-6 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py index 29694be58bce0eb41b05439da35ef07a542ef12a..1eadb7d912629024ee21e30b0a5fa4910bb96e06 100644 --- a/python/paddle/v2/fluid/tests/test_optimizer.py +++ b/python/paddle/v2/fluid/tests/test_optimizer.py @@ -2,7 +2,7 @@ import unittest import paddle.v2.fluid.framework as framework import paddle.v2.fluid.optimizer as optimizer -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward class TestOptimizer(unittest.TestCase): @@ -102,7 +102,7 @@ class TestMomentumOptimizer(unittest.TestCase): dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( @@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase): learning_rate = 0.01 momentum_optimizer = self.MockMomentum( learning_rate=learning_rate, momentum=0.2, use_nesterov=True) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( @@ -209,7 +209,7 @@ class TestAdagradOptimizer(unittest.TestCase): learning_rate = 0.01 adagrad_optimizer = self.MockAdagrad( learning_rate=learning_rate, epsilon=1.0e-6) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, @@ -269,7 +269,7 @@ class TestAdamOptimizer(unittest.TestCase): learning_rate = 0.01 adam_optimizer = self.MockAdam( learning_rate=learning_rate, beta1=0.9, beta2=0.999) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, @@ -331,7 +331,7 @@ class TestAdamaxOptimizer(unittest.TestCase): learning_rate = 0.01 adamax_optimizer = self.MockAdamax( learning_rate=learning_rate, beta1=0.9, beta2=0.999) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, @@ -390,7 +390,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): learning_rate = 0.01 decayed_adagrad_optimizer = self.MockDecayedAdagrad( learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) opts = decayed_adagrad_optimizer.create_optimization_pass( diff --git a/python/paddle/v2/fluid/tests/test_parallel_op.py b/python/paddle/v2/fluid/tests/test_parallel_op.py new file mode 100644 index 0000000000000000000000000000000000000000..2788f4e519b31b45250fbb923b2309e8bb1f6fa1 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_parallel_op.py @@ -0,0 +1,46 @@ +import unittest + +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid as fluid +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.backward import append_backward +import numpy as np +import paddle.v2.fluid.core as core + + +class ParallelOpTest(unittest.TestCase): + def setUp(self): + x = layers.data( + shape=[-1, 30, 40], + dtype='float32', + name='x', + append_batch_size=False, + stop_gradient=False) + + places = fluid.default_main_program().global_block().create_var() + pd = layers.ParallelDo(places=places) + + with pd.do(): + data = pd.read_input(x) + hidden = layers.fc(input=data, size=7) + pd.write_output(hidden) + data = pd() + loss = layers.mean(x=data) + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(loss) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + exe.run(fluid.default_main_program(), + feed={ + x.name: np.random.uniform(0.1, 0.6, + (20, 30, 40)).astype("float32") + }) + + def test_forward(self): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_profiler.py b/python/paddle/v2/fluid/tests/test_profiler.py index 395d0dc36a3d1d6fbfebb4cdf34395c4edee412d..e3f3ac58ef9b30864849770510f7339749dab84f 100644 --- a/python/paddle/v2/fluid/tests/test_profiler.py +++ b/python/paddle/v2/fluid/tests/test_profiler.py @@ -3,6 +3,7 @@ import numpy as np import paddle.v2.fluid as fluid import paddle.v2.fluid.profiler as profiler import paddle.v2.fluid.layers as layers +import os class TestProfiler(unittest.TestCase): @@ -14,14 +15,16 @@ class TestProfiler(unittest.TestCase): data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) - place = fluid.GPUPlace(0) + place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: + output_file = 'cuda_profiler.txt' + with profiler.cuda_profiler(output_file, 'csv') as nvprof: for i in range(epoc): input = np.random.random(dshape).astype('float32') exe.run(fluid.default_main_program(), feed={'data': input}) + os.remove(output_file) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py index e38c763ddbcc5c8410f41d062c05499333a3ee55..84f4e36fa7312fbcb96cc66ff26e234c3016df30 100644 --- a/python/paddle/v2/fluid/tests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py @@ -3,7 +3,7 @@ import unittest import paddle.v2.fluid.layers as layers from paddle.v2.fluid.framework import Program, grad_var_name from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward import numpy as np import paddle.v2.fluid.core as core @@ -177,7 +177,7 @@ class RecurrentOpTest1(unittest.TestCase): def test_backward(self): self.check_forward() - append_backward_ops(self.output) + append_backward(self.output) ana_grad = [np.array(x) for x in self.backward()] diff --git a/python/paddle/v2/fluid/tests/test_regularizer.py b/python/paddle/v2/fluid/tests/test_regularizer.py index 24baf55e90c98f39bab926e8c85a791eee5ed4a4..890c881a126a32344128652691c6cad45e02e82d 100644 --- a/python/paddle/v2/fluid/tests/test_regularizer.py +++ b/python/paddle/v2/fluid/tests/test_regularizer.py @@ -3,7 +3,7 @@ import unittest import paddle.v2.fluid.framework as framework import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.regularizer as regularizer -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward class TestL2DecayRegularizer(unittest.TestCase): @@ -33,7 +33,7 @@ class TestL2DecayRegularizer(unittest.TestCase): dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads) @@ -70,7 +70,7 @@ class TestL1DecayRegularizer(unittest.TestCase): dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) - params_grads = append_backward_ops(mean_out) + params_grads = append_backward(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads) diff --git a/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py b/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py index 8f5774835e02191a068e86ea56f3f877c464a391..7c136f6360ce73a7c532b5486e544796e6853bcb 100644 --- a/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py +++ b/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py @@ -12,7 +12,7 @@ class TestReorderLoDTensor(unittest.TestCase): new_dat = fluid.layers.reorder_lod_tensor_by_rank( x=dat, rank_table=table) loss = fluid.layers.mean(x=new_dat) - fluid.backward.append_backward_ops(loss=loss) + fluid.backward.append_backward(loss=loss) cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) diff --git a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py index 9999165ed509aa40f31f26aa676f381561bd0016..d1bb20f37a3785f70bee072b9df282bba4012c16 100644 --- a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py +++ b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py @@ -2,7 +2,7 @@ import unittest from paddle.v2.fluid.framework import Program from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward import numpy as np import paddle.v2.fluid.core as core diff --git a/python/paddle/v2/fluid/tests/test_sgd_op.py b/python/paddle/v2/fluid/tests/test_sgd_op.py index ca05a381f06cfd40b7939dbda8d4f1f4aacd0271..14d41e172a22c677235ab3fa997ef6f0b6e39778 100644 --- a/python/paddle/v2/fluid/tests/test_sgd_op.py +++ b/python/paddle/v2/fluid/tests/test_sgd_op.py @@ -55,8 +55,7 @@ class TestSparseSGDOp(unittest.TestCase): Grad='Grad', ParamOut='Param', LearningRate='LearningRate') - ctx = core.DeviceContext.create(place) - sgd_op.run(scope, ctx) + sgd_op.run(scope, place) # get and compare result result_array = np.array(param) @@ -79,7 +78,7 @@ class TestSparseSGDOp(unittest.TestCase): def test_sparse_sgd(self): places = [core.CPUPlace()] if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) + places.append(core.CUDAPlace(0)) for place in places: self.check_with_place(place) diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py index 86db4c64b493d94cc675ed4bcee7e2925fef1977..be1588fc2d09fa58882425eb3d080ef1560ebc79 100644 --- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py @@ -2,7 +2,7 @@ import unittest import paddle.v2.fluid.core as core from paddle.v2.fluid.executor import Executor import paddle.v2.fluid.layers as layers -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward from paddle.v2.fluid.framework import default_main_program import numpy @@ -35,7 +35,7 @@ class TestShrinkRNNMemory(unittest.TestCase): self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2])) mem3_mean = layers.mean(x=mem3) - append_backward_ops(loss=mem3_mean) + append_backward(loss=mem3_mean) x_grad = exe.run( feed={'x': tensor}, fetch_list=[main_program.global_block().var('x@GRAD')])[0] diff --git a/python/paddle/v2/fluid/tests/test_softmax_op.py b/python/paddle/v2/fluid/tests/test_softmax_op.py index b41c810d9a6269c934a434b085748a86deccb475..136fc0283afd6acf1de4baae5e681789662295ce 100644 --- a/python/paddle/v2/fluid/tests/test_softmax_op.py +++ b/python/paddle/v2/fluid/tests/test_softmax_op.py @@ -17,14 +17,14 @@ class TestSoftmaxOp(OpTest): 'X': np.random.uniform(0.1, 1, [10, 10]).astype("float32") } self.outputs = { - 'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) + 'Out': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) } def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y') + self.check_grad(['X'], 'Out') if __name__ == "__main__": diff --git a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py index 8cdd59ff3cc7deb57252fc5218d239f86016cb9c..2e4defd55d75c2012f39bea30a6c4de12528e77c 100644 --- a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py +++ b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py @@ -4,7 +4,7 @@ import numpy as np import paddle.v2.fluid.layers as layers from paddle.v2.fluid.framework import Program, program_guard from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward class TestCPULoDTensorArrayOps(unittest.TestCase): @@ -133,7 +133,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): in_true=out_true, in_false=out_false, mask=y, x=x, level=level) mean = layers.mean(x=out) - append_backward_ops(mean) + append_backward(mean) tensor = core.LoDTensor() tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) diff --git a/python/paddle/v2/fluid/tests/test_uniform_random_op.py b/python/paddle/v2/fluid/tests/test_uniform_random_op.py index 00b4f196209a6414f1063a33c0e31093e33ca39d..dbe4d6bcd069d2088b3cc1b4efd575d14afd4198 100644 --- a/python/paddle/v2/fluid/tests/test_uniform_random_op.py +++ b/python/paddle/v2/fluid/tests/test_uniform_random_op.py @@ -23,10 +23,9 @@ class TestUniformRandomOp(unittest.TestCase): def test_gpu(self): if core.is_compile_gpu(): - self.uniform_random_test(place=core.GPUPlace(0)) + self.uniform_random_test(place=core.CUDAPlace(0)) def uniform_random_test(self, place): - context = core.DeviceContext.create(place) program = fluid.Program() block = program.global_block() vout = block.create_var(name="Out") diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py index 033b03a4957131e1155c61e8ed2f10eefb23fda4..7c5593cc5e5a66d4ccb237e3706ff3e544adf033 100644 --- a/python/paddle/v2/fluid/tests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/test_while_op.py @@ -2,7 +2,7 @@ import unittest import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor import paddle.v2.fluid.core as core -from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.backward import append_backward import numpy @@ -46,7 +46,7 @@ class TestWhileOp(unittest.TestCase): sum_result = layers.array_read(array=mem_array, i=i) loss = layers.mean(x=sum_result) - append_backward_ops(loss) + append_backward(loss) cpu = core.CPUPlace() exe = Executor(cpu) diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py index 27c82c95f79e0a3e3129627bfa33d85e0d3cd862..44a6e344630bb35d28ee29078bf8727053a24bef 100644 --- a/python/paddle/v2/reader/decorator.py +++ b/python/paddle/v2/reader/decorator.py @@ -14,7 +14,7 @@ __all__ = [ 'map_readers', 'buffered', 'compose', 'chain', 'shuffle', - 'ComposeNotAligned', 'firstn', 'xmap_readers', 'pipe_reader' + 'ComposeNotAligned', 'firstn', 'xmap_readers', 'PipeReader' ] from threading import Thread @@ -334,93 +334,72 @@ def _buf2lines(buf, line_break="\n"): return lines[:-1], lines[-1] -def pipe_reader(left_cmd, - parser, - bufsize=8192, - file_type="plain", - cut_lines=True, - line_break="\n"): +class PipeReader: """ - pipe_reader read data by stream from a command, take it's - stdout into a pipe buffer and redirect it to the parser to - parse, then yield data as your desired format. + PipeReader read data by stream from a command, take it's + stdout into a pipe buffer and redirect it to the parser to + parse, then yield data as your desired format. - You can using standard linux command or call another program - to read data, from HDFS, Ceph, URL, AWS S3 etc: + You can using standard linux command or call another program + to read data, from HDFS, Ceph, URL, AWS S3 etc: - cmd = "hadoop fs -cat /path/to/some/file" - cmd = "cat sample_file.tar.gz" - cmd = "curl http://someurl" - cmd = "python print_s3_bucket.py" + .. code-block:: python + cmd = "hadoop fs -cat /path/to/some/file" + cmd = "cat sample_file.tar.gz" + cmd = "curl http://someurl" + cmd = "python print_s3_bucket.py" - A sample parser: + An example: + + .. code-block:: python - def sample_parser(lines): - # parse each line as one sample data, - # return a list of samples as batches. - ret = [] - for l in lines: - ret.append(l.split(" ")[1:5]) - return ret - - :param left_cmd: command to excute to get stdout from. - :type left_cmd: string - :param parser: parser function to parse lines of data. - if cut_lines is True, parser will receive list - of lines. - if cut_lines is False, parser will receive a - raw buffer each time. - parser should return a list of parsed values. - :type parser: callable - :param bufsize: the buffer size used for the stdout pipe. - :type bufsize: int - :param file_type: can be plain/gzip, stream buffer data type. - :type file_type: string - :param cut_lines: whether to pass lines instead of raw buffer - to the parser - :type cut_lines: bool - :param line_break: line break of the file, like \n or \r - :type line_break: string - - :return: the reader generator. - :rtype: callable + def example_reader(): + for f in myfiles: + pr = PipeReader("cat %s"%f) + for l in pr.get_line(): + sample = l.split(" ") + yield sample """ - if not isinstance(left_cmd, str): - raise TypeError("left_cmd must be a string") - if not callable(parser): - raise TypeError("parser must be a callable object") - - # TODO(typhoonzero): add a thread to read stderr - - # Always init a decompress object is better than - # create in the loop. - dec = zlib.decompressobj( - 32 + zlib.MAX_WBITS) # offset 32 to skip the header - def reader(): - process = subprocess.Popen( - left_cmd.split(" "), bufsize=bufsize, stdout=subprocess.PIPE) + def __init__(self, command, bufsize=8192, file_type="plain"): + if not isinstance(command, str): + raise TypeError("left_cmd must be a string") + if file_type == "gzip": + self.dec = zlib.decompressobj( + 32 + zlib.MAX_WBITS) # offset 32 to skip the header + self.file_type = file_type + self.bufsize = bufsize + self.process = subprocess.Popen( + command.split(" "), bufsize=bufsize, stdout=subprocess.PIPE) + + def get_line(self, cut_lines=True, line_break="\n"): + """ + :param cut_lines: cut buffer to lines + :type cut_lines: bool + :param line_break: line break of the file, like \n or \r + :type line_break: string + + :return: one line or a buffer of bytes + :rtype: string + """ remained = "" while True: - buff = process.stdout.read(bufsize) + buff = self.process.stdout.read(self.bufsize) if buff: - if file_type == "gzip": - decomp_buff = dec.decompress(buff) - elif file_type == "plain": + if self.file_type == "gzip": + decomp_buff = self.dec.decompress(buff) + elif self.file_type == "plain": decomp_buff = buff else: - raise TypeError("file_type %s is not allowed" % file_type) + raise TypeError("file_type %s is not allowed" % + self.file_type) if cut_lines: lines, remained = _buf2lines(''.join( [remained, decomp_buff]), line_break) - parsed_list = parser(lines) - for ret in parsed_list: - yield ret + for line in lines: + yield line else: - for ret in parser(decomp_buff): - yield ret + yield decomp_buff else: break - - return reader diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py index 06e14796daf27812b9aeb1e4b024f294c7609991..4ba71969dffe7447b6c5b70aeb752a4e5469fb36 100644 --- a/python/paddle/v2/reader/tests/decorator_test.py +++ b/python/paddle/v2/reader/tests/decorator_test.py @@ -147,8 +147,11 @@ class TestXmap(unittest.TestCase): class TestPipeReader(unittest.TestCase): def test_pipe_reader(self): - def simple_parser(lines): - return lines + def example_reader(myfiles): + for f in myfiles: + pr = paddle.v2.reader.PipeReader("cat %s" % f, bufsize=128) + for l in pr.get_line(): + yield l import tempfile @@ -159,17 +162,12 @@ class TestPipeReader(unittest.TestCase): for r in records: f.write('%s\n' % r) - cmd = "cat %s" % temp.name - reader = paddle.v2.reader.pipe_reader( - cmd, simple_parser, bufsize=128) - for i in xrange(4): - result = [] - for r in reader(): - result.append(r) - - for idx, e in enumerate(records): - print e, result[idx] - self.assertEqual(e, result[idx]) + result = [] + for r in example_reader([temp.name]): + result.append(r) + + for idx, e in enumerate(records): + self.assertEqual(e, result[idx]) finally: # delete the temporary file temp.close() diff --git a/python/setup.py.in b/python/setup.py.in index 8396fb44cfcee28211b5d3db7684a4adce1fb1f6..66ccfe808763d0e157f866ce08868e3fdebdea79 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -79,8 +79,7 @@ if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: # the prefix is sys.prefix which should always be usr paddle_bin_dir = 'opt/paddle/bin' -paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage', - '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', +paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model', '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main', '${PADDLE_BINARY_DIR}/paddle/scripts/paddle']