diff --git a/CMakeLists.txt b/CMakeLists.txt index ed704585d8a6bf3befd9a549aa5a62a33fea3da9..291a960b1471b22a6cb53c4ca49b45609afb4dc6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F option(WITH_AMD_GPU "Compile PaddlePaddle with AMD GPU" OFF) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) +option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -103,6 +104,8 @@ if(ANDROID OR IOS) "Disable RDMA when cross-compiling for Android and iOS" FORCE) set(WITH_MKL OFF CACHE STRING "Disable MKL when cross-compiling for Android and iOS" FORCE) + set(WITH_NGRAPH OFF CACHE STRING + "Disable nGraph when cross-compiling for Android and iOS" FORCE) set(WITH_GOLANG OFF CACHE STRING "Disable golang when cross-compiling for Android and iOS" FORCE) @@ -171,6 +174,7 @@ include(external/protobuf) # download, build, install protobuf include(external/python) # download, build, install python include(external/openblas) # download, build, install openblas include(external/mkldnn) # download, build, install mkldnn +include(external/ngraph) # download, build, install nGraph include(external/swig) # download, build, install swig include(external/boost) # download boost include(external/any) # download libn::any diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 9fea9ca05bce921b105a0f092c321b0c3a55c63c..785148d4f9f44032e2ce5bf93f0dc80fc865808b 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -37,7 +37,6 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) # For MKLDNN code to include internal headers. -INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include mkldnn.h IF(${CBLAS_PROVIDER} STREQUAL "MKLML") SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) diff --git a/cmake/external/ngraph.cmake b/cmake/external/ngraph.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2e335579f32df4f146c8d88e05e684a9a8105e20 --- /dev/null +++ b/cmake/external/ngraph.cmake @@ -0,0 +1,92 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_library(ngraph INTERFACE) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with nGraph in Paddle yet." + "Force WITH_NGRAPH=OFF") + SET(WITH_NGRAPH OFF CACHE STRING "Disable nGraph in Windows and MacOS" FORCE) +ENDIF() + +IF(${WITH_NGRAPH} AND NOT ${WITH_MKLDNN}) + MESSAGE(WARNING + "nGraph needs mkl-dnn to be enabled." + "Force WITH_NGRAPH=OFF") + SET(WITH_NGRAPH OFF CACHE STRING "Disable nGraph if mkl-dnn is disabled" FORCE) +ENDIF() + +IF(NOT ${WITH_NGRAPH}) + return() +ENDIF() + +INCLUDE(ExternalProject) + +SET(NGRAPH_PROJECT "extern_ngraph") +SET(NGRAPH_VERSION "0.9") +SET(NGRAPH_GIT_TAG "f9fd9d4cc318dc59dd4b68448e7fbb5f67a28bd0") +SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph) +SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph) +SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include) +SET(NGRAPH_SHARED_LIB_NAME libngraph.so.${NGRAPH_VERSION}) +SET(NGRAPH_CPU_LIB_NAME libcpu_backend.so) +SET(NGRAPH_TBB_LIB_NAME libtbb.so.2) +SET(NGRAPH_GIT_REPO "https://github.com/NervanaSystems/ngraph.git") + +ExternalProject_Add( + ${NGRAPH_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${MKLDNN_PROJECT} ${MKLML_PROJECT} + GIT_REPOSITORY ${NGRAPH_GIT_REPO} + GIT_TAG ${NGRAPH_GIT_TAG} + PREFIX ${NGRAPH_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${NGRAPH_INSTALL_DIR} + CMAKE_ARGS -DNGRAPH_UNIT_TEST_ENABLE=FALSE + CMAKE_ARGS -DNGRAPH_TOOLS_ENABLE=FALSE + CMAKE_ARGS -DNGRAPH_INTERPRETER_ENABLE=FALSE + CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE + CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR} + CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib +) + +if(UNIX AND NOT APPLE) + include(GNUInstallDirs) + SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}) +else() + SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/lib) +endif() +MESSAGE(STATUS "nGraph lib will be installed at: ${NGRAPH_LIB_DIR}") + +SET(NGRAPH_SHARED_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_SHARED_LIB_NAME}) +SET(NGRAPH_CPU_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_CPU_LIB_NAME}) +SET(NGRAPH_TBB_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_TBB_LIB_NAME}) + +# Workaround for nGraph expecting mklml to be in mkldnn install directory. +ExternalProject_Add_Step( + ${NGRAPH_PROJECT} + PrepareMKL + COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_LIB} ${MKLDNN_INSTALL_DIR}/lib/libmklml_intel.so + COMMAND ${CMAKE_COMMAND} -E create_symlink ${MKLML_IOMP_LIB} ${MKLDNN_INSTALL_DIR}/lib/libiomp5.so + DEPENDEES download + DEPENDERS configure +) + +add_dependencies(ngraph ${NGRAPH_PROJECT}) +target_compile_definitions(ngraph INTERFACE -DPADDLE_WITH_NGRAPH) +target_include_directories(ngraph INTERFACE ${NGRAPH_INC_DIR}) +target_link_libraries(ngraph INTERFACE ${NGRAPH_SHARED_LIB}) +LIST(APPEND external_project_dependencies ngraph) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 550b0dada8e90c1e2b33705fd53c065672113b45..45ef9b4550291cadaa9571f05dbaefdf4a0c223a 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -30,66 +30,61 @@ UNSET_VAR(PROTOBUF_LITE_LIBRARY) UNSET_VAR(PROTOBUF_LIBRARY) UNSET_VAR(PROTOBUF_INCLUDE_DIR) UNSET_VAR(Protobuf_PROTOC_EXECUTABLE) +function(protobuf_generate_python SRCS) + # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") + return() + endif() -if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined. - function(protobuf_generate_python SRCS) - # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake - if(NOT ARGN) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") - return() - endif() - - if(PROTOBUF_GENERATE_CPP_APPEND_PATH) - # Create an include path for each file specified - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(ABS_PATH ${ABS_FIL} PATH) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - else() - set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - - if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) - set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") - endif() - - if(DEFINED Protobuf_IMPORT_DIRS) - foreach(DIR ${Protobuf_IMPORT_DIRS}) - get_filename_component(ABS_PATH ${DIR} ABSOLUTE) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - endif() - - set(${SRCS}) + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified foreach(FIL ${ARGN}) get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) - get_filename_component(FIL_DIR ${FIL} DIRECTORY) - if(FIL_DIR) - set(FIL_WE "${FIL_DIR}/${FIL_WE}") - endif() + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() - list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" - COMMAND ${Protobuf_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} - COMMENT "Running Python protocol buffer compiler on ${FIL}" - VERBATIM ) + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() endforeach() + endif() - set(${SRCS} ${${SRCS}} PARENT_SCOPE) - endfunction() -endif() + set(${SRCS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) + get_filename_component(FIL_DIR ${FIL} DIRECTORY) + if(FIL_DIR) + set(FIL_WE "${FIL_DIR}/${FIL_WE}") + endif() + endif() + list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE} + COMMENT "Running Python protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + + set(${SRCS} ${${SRCS}} PARENT_SCOPE) +endfunction() # Print and set the protobuf library information, # finish this cmake process and exit from this file. @@ -126,6 +121,7 @@ macro(PROMPT_PROTOBUF_LIB) # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. # make `protobuf_generate_cpp` happy. SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) + FOREACH(dep ${protobuf_DEPS}) ADD_DEPENDENCIES(protobuf ${dep}) ADD_DEPENDENCIES(protobuf_lite ${dep}) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f58131e75bebc5633ae01299a1f2084b63e0c8b9..3378d210cdf6a625e11b1dd5fe348aa04cdb9361 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -103,7 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)) -paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode'], varargs=None, keywords=None, defaults=(False, -100, False)) +paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False)) paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)) @@ -179,10 +179,12 @@ paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], vara paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)) +paddle.fluid.layers.similarity_focus ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)) paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) @@ -201,6 +203,7 @@ paddle.fluid.layers.create_tensor ArgSpec(args=['dtype', 'name', 'persistable'], paddle.fluid.layers.create_parameter ArgSpec(args=['shape', 'dtype', 'name', 'attr', 'is_bias', 'default_initializer'], varargs=None, keywords=None, defaults=(None, None, False, None)) paddle.fluid.layers.create_global_var ArgSpec(args=['shape', 'value', 'dtype', 'persistable', 'force_cpu', 'name'], varargs=None, keywords=None, defaults=(False, False, None)) paddle.fluid.layers.cast ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=None) +paddle.fluid.layers.tensor_array_to_tensor ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.concat ArgSpec(args=['input', 'axis', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.sums ArgSpec(args=['input', 'out'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.assign ArgSpec(args=['input', 'output'], varargs=None, keywords=None, defaults=(None,)) @@ -271,6 +274,7 @@ paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, k paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.prior_box ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)) +paddle.fluid.layers.density_prior_box ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, None)) paddle.fluid.layers.multi_box_head ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)) paddle.fluid.layers.bipartite_match ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 0a3c8a6cb5c4187063db8ec2decd1f8d55aa39ea..b534a5509279ef7bfc5fc92ec726224e6c5ed16f 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -167,10 +167,12 @@ struct HitGroup { bool Match(Node *node, PDNode *pat) { if (nodes_.count(node)) { - if (!roles.count(pat)) return false; - return roles[pat] == node; + if (roles.count(pat) && roles[pat] == node) return true; + return false; + } else { + if (roles.count(pat) && roles[pat] != node) return false; + return true; } - return !roles.count(pat) || roles.at(pat) == node; } void Register(Node *node, PDNode *pat) { @@ -198,7 +200,6 @@ GraphPatternDetector::DetectPatterns() { std::vector result; std::vector init_groups; std::array, 2> bi_records; - // PADDLE_ENFORCE(!pattern_.edges().empty(), "At least one edge is needed"); auto *first_pnode = pattern_.edges().empty() ? pattern().nodes().front().get() : pattern_.edges().front().first; if (!pdnodes2nodes_.count(first_pnode)) return result; @@ -228,11 +229,12 @@ GraphPatternDetector::DetectPatterns() { VLOG(80) << "check " << source->id() << " -- " << target->id(); // TODO(Superjomn) add some prune strategies. for (const auto &group : pre_groups) { - HitGroup new_group = group; - if (IsNodesLink(source, target) && - new_group.Match(source, edge.first)) { - new_group.Register(source, edge.first); - if (new_group.Match(target, edge.second)) { + if (IsNodesLink(source, target)) { + HitGroup new_group = group; + bool flag = new_group.Match(source, edge.first) && + new_group.Match(target, edge.second); + if (flag) { + new_group.Register(source, edge.first); new_group.Register(target, edge.second); cur_groups.push_back(new_group); // TODO(Superjomn) need to unique @@ -261,14 +263,16 @@ GraphPatternDetector::DetectPatterns() { return result; } -bool GraphItemCMP(const std::pair &a, +struct GraphItemLessThan { + bool operator()(const std::pair &a, const std::pair &b) { - if (a.first != b.first) { - return a.first < b.first; - } else { - return a.second < b.second; + if (a.first != b.first) { + return a.first < b.first; + } else { + return a.second < b.second; + } } -} +}; // TODO(Superjomn) enhance the function as it marks unique unique as duplicates // see https://github.com/PaddlePaddle/Paddle/issues/13550 @@ -282,7 +286,7 @@ void GraphPatternDetector::UniquePatterns( for (auto &g : *subgraphs) { // Sort the items in the sub-graph, and transform to a string key. std::vector> sorted_keys(g.begin(), g.end()); - std::sort(sorted_keys.begin(), sorted_keys.end(), GraphItemCMP); + std::sort(sorted_keys.begin(), sorted_keys.end(), GraphItemLessThan()); std::stringstream ss; for (auto &item : sorted_keys) { ss << item.first << ":" << item.second; diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 0506907ab56bd4eeb720c43601dcf20e6ebc9d67..5624878d439873e5f6aee6ec9234e31d5c77ff97 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -259,6 +259,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { if (row_size >= 0) { ss << "[row_size=" << row_size << "]"; } + std::string dtype = GetDtype(*scope, output.second[i]); + ss << ":" << dtype; ss << "[" << GetDims(*scope, var_name, true) << "]"; ss << "(" << GetLoD(*scope, var_name) << ")"; } diff --git a/paddle/fluid/framework/var_type_inference.h b/paddle/fluid/framework/var_type_inference.h index f3035cd712bdea517068b4c172bb2794d5fccddb..64236b78d2e390ea5f6c43c76a4b33b62c67629f 100644 --- a/paddle/fluid/framework/var_type_inference.h +++ b/paddle/fluid/framework/var_type_inference.h @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/type_defs.h" namespace paddle { @@ -24,5 +27,27 @@ class VarTypeInference { virtual void operator()(const OpDesc& op_desc, BlockDesc* block) const = 0; }; +class PassInDtypeAndVarTypeToOutput : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const final { + auto in_out_var_names = this->GetInputOutputWithSameType(); + + for (auto& i_o_n : in_out_var_names) { + auto& x_name = op_desc.Input(i_o_n.first).at(0); + auto& out_name = op_desc.Output(i_o_n.second).at(0); + + auto& x = block->FindRecursiveOrCreateVar(x_name); + auto& out = block->FindRecursiveOrCreateVar(out_name); + out.SetType(x.GetType()); + out.SetDataType(x.GetDataType()); + } + } + + protected: + virtual std::unordered_map + GetInputOutputWithSameType() const = 0; +}; + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index cd6636a7ebd94814714fa61baa4f3c12192ffb6c..60887830144ceab75c6b7134be82609ba2d78f38 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -113,7 +113,9 @@ void Analyzer::Run(Argument* argument) { passes.push_back("infer_clean_graph_pass"); passes.push_back("graph_viz_pass"); // add graphviz for debug. for (auto& pass : ir_passes_) { - if (!disabled_ir_passes_.count(pass)) { + // skip mkldnn pass when use_mkldnn_ = false; + bool skip_pass = (!use_mkldnn_) && pass.find("mkldnn") != std::string::npos; + if (!disabled_ir_passes_.count(pass) && !skip_pass) { passes.push_back(pass); passes.push_back("graph_viz_pass"); // add graphviz for debug. } diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc index bdcb30f159ec17a8d3b23d020b2ea082e71ace1b..545017da07f8e414f21a02c35fca96aba6de41aa 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -112,8 +112,8 @@ void DataFlowGraph::Build(const framework::proto::ProgramDesc &prog) { out_alias->SetPbMsg(out->pb_msg()); var2id[out_alias->name()] = out_alias->id(); // update variable's alias Node - LOG(INFO) << "loop found in graph, create SSA alias node [" - << out_alias->repr() << "] for [" << out->repr() << "]"; + VLOG(40) << "loop found in graph, create SSA alias node [" + << out_alias->repr() << "] for [" << out->repr() << "]"; out = out_alias; } out->inlinks.push_back(o); diff --git a/paddle/fluid/inference/tensorrt/convert/activation_op.cc b/paddle/fluid/inference/tensorrt/convert/activation_op.cc index e73c5bbf57501e4ff3c080a46d91685035652bfa..0b756534ec6fbf27a3e92bf39fb7544d9785ca48 100644 --- a/paddle/fluid/inference/tensorrt/convert/activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/activation_op.cc @@ -27,7 +27,7 @@ class ActivationOpConverter : public OpConverter { // Here the two nullptr looks strange, that's because the // framework::OpDesc's constructor is strange. framework::OpDesc op_desc(op, nullptr); - LOG(INFO) + VLOG(3) << "convert a fluid Activation op to tensorrt activation layer whose " "type is " << op_type_; diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc index 3330af2da6c97ad153dcecd86be4b441eac62b5e..d017bac66dd99a4b54c44ec786de61d1e66b8981 100644 --- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -23,7 +23,7 @@ class BatchNormOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - LOG(INFO) << "convert a fluid batch norm op to tensorrt batch_norm"; + VLOG(3) << "convert a fluid batch norm op to tensorrt batch_norm"; framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index cd1bb892bdf0ccb69114ed51accad40d263581d5..525ba9dc341c8c1343553ac9523611f79ac3aa2d 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -25,7 +25,7 @@ class ConcatOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias"; + VLOG(3) << "convert a fluid mul op to tensorrt mul layer without bias"; framework::OpDesc op_desc(op, nullptr); // Declare inputs diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 7bcf2dd1eeb17e802c5647df31945284ae08fa95..43950b8c048b4e1b8974956948caa639812b2f78 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -37,8 +37,7 @@ class Conv2dOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - LOG(INFO) - << "convert a fluid conv2d op to tensorrt conv layer without bias"; + VLOG(3) << "convert a fluid conv2d op to tensorrt conv layer without bias"; framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1); diff --git a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc index df86a68dac521257aadac5ab1ed4020475a382f3..ddbc724e3b2a48b75df17f9bda691a1fd3883c32 100644 --- a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc @@ -25,7 +25,7 @@ class DropoutOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(40) << "convert a fluid dropout op to tensorrt dropout layer"; + VLOG(3) << "convert a fluid dropout op to tensorrt dropout layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 0a6ce568f194f03c7259e1ebf28dd6ce4df2d594..671bcd8aa9a9fff34644a056499961cf6ab81287 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -26,7 +26,7 @@ class ElementwiseWeightOpConverter : public OpConverter { // Here the two nullptr looks strange, that's because the // framework::OpDesc's constructor is strange. framework::OpDesc op_desc(op, nullptr); - LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer"; + VLOG(3) << "convert a fluid elementwise op to tensorrt IScaleLayer"; PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight @@ -108,7 +108,7 @@ class ElementwiseTensorOpConverter : public OpConverter { // Here the two nullptr looks strange, that's because the // framework::OpDesc's constructor is strange. framework::OpDesc op_desc(op, nullptr); - LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer"; + VLOG(3) << "convert a fluid elementwise op to tensorrt IScaleLayer"; PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc index bc1d9ee2811c447cd84f7e5b415b6bd53433b986..eef4fab4e86f05fa80bc614371f1aa43e433407e 100644 --- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc @@ -52,7 +52,7 @@ class FcOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(40) << "convert a fluid fc op to tensorrt fc layer without bias"; + VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias"; framework::OpDesc op_desc(op, nullptr); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); diff --git a/paddle/fluid/inference/tensorrt/convert/mul_op.cc b/paddle/fluid/inference/tensorrt/convert/mul_op.cc index babd56d62392299a58ecc16b7441029f80022498..5b6aaad49833cedbd8d1ee0ec5d24c7f983190e6 100644 --- a/paddle/fluid/inference/tensorrt/convert/mul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/mul_op.cc @@ -25,7 +25,7 @@ class MulOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias"; + VLOG(3) << "convert a fluid mul op to tensorrt mul layer without bias"; framework::OpDesc op_desc(op, nullptr); // Declare inputs diff --git a/paddle/fluid/inference/tensorrt/convert/pad_op.cc b/paddle/fluid/inference/tensorrt/convert/pad_op.cc index c3699428d29e4d21f4eafbcf1fd0e255c91fabe6..4afcb0aecec9d07b52d2fd701fae8750067a6041 100644 --- a/paddle/fluid/inference/tensorrt/convert/pad_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pad_op.cc @@ -25,7 +25,7 @@ class PadOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(40) << "convert a fluid transpose op to tensorrt tranpose layer"; + VLOG(3) << "convert a fluid transpose op to tensorrt tranpose layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index d943d699f2cbe6726b3e634d0bd08e7cff3057d6..48850020840a49bd309c007943f14b2f7eec5e2d 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -25,7 +25,7 @@ class Pool2dOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(40) + VLOG(3) << "convert a fluid pool2d op to tensorrt pool2d layer without bias"; framework::OpDesc op_desc(op, nullptr); // Declare inputs diff --git a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc index 174cdbe53b2698d05b680d27f1f94bee38d1dfa5..80bfb2d190a5637032e7c18fbac7f22b3a9e81e1 100644 --- a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc @@ -25,7 +25,7 @@ class SoftMaxOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(40) + VLOG(3) << "convert a fluid softmax op to tensorrt softmax layer without bias"; framework::OpDesc op_desc(op, nullptr); // Declare inputs diff --git a/paddle/fluid/inference/tensorrt/helper.h b/paddle/fluid/inference/tensorrt/helper.h index b6e7968108403c9c9c192759c44eac040d1c5073..fc7ca7714e9325d2b6bce6189300aa339c81c2ba 100644 --- a/paddle/fluid/inference/tensorrt/helper.h +++ b/paddle/fluid/inference/tensorrt/helper.h @@ -52,7 +52,7 @@ class NaiveLogger : public nvinfer1::ILogger { void log(nvinfer1::ILogger::Severity severity, const char* msg) override { switch (severity) { case Severity::kINFO: - LOG(INFO) << msg; + VLOG(3) << msg; break; case Severity::kWARNING: LOG(WARNING) << msg; diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 2ca84c80058b35840aff5d072cdc99ecf5165f8e..5287cd51cd2c339601b91b6a5e9ad4b9b1f5ee48 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -110,5 +110,5 @@ if(WITH_GPU AND TENSORRT_FOUND) endif() cc_test(test_trt_models SRCS trt_models_tester.cc ARGS --dirname=${TRT_MODEL_INSTALL_DIR}/trt_test_models - DEPS paddle_inference_tensorrt_subgraph_engine) + DEPS paddle_inference_tensorrt_subgraph_engine SERIAL) endif() diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 2a7de024bf4ec1f49f7672de782f68ba8b353bbd..776bdfaee8ac24b066b95328fdb59d240f16a446 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -5,6 +5,8 @@ list(REMOVE_DUPLICATES GENERAL_OPS) set(DEPS_OPS "") set(pybind_file ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h) file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operator/CMakeLists.txt. DO NOT EDIT!\n\n") + +set(PART_CUDA_KERNEL_FILES) function(op_library TARGET) # op_library is a function to create op library. The interface is same as # cc_library. But it handle split GPU/CPU code and link some common library @@ -37,6 +39,12 @@ function(op_library TARGET) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu) endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) + set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu + ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) + list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) + endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu) list(APPEND hip_cu_srcs ${TARGET}.hip.cu) endif() @@ -317,6 +325,7 @@ op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) op_library(save_combine_op DEPS lod_tensor) op_library(load_combine_op DEPS lod_tensor) +op_library(tensor_array_to_tensor_op DEPS concat_op) op_library(concat_op DEPS concat_and_split) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) @@ -326,6 +335,8 @@ foreach(src ${GENERAL_OPS}) endforeach() file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n") + + if (NOT WIN32) add_subdirectory(reader) endif(NOT WIN32) @@ -352,3 +363,14 @@ if(NOT WIN32) nv_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor) + +if(WITH_GPU) + foreach(CUDA_KERNEL_FILE ${PART_CUDA_KERNEL_FILES}) + file(READ ${CUDA_KERNEL_FILE} TARGET_CONTENT) + string(REGEX MATCH "REGISTER_OP_CUDA_KERNEL\\(\\n?([^,]+),.*" MATCHED ${TARGET_CONTENT}) + if (MATCHED) + string(STRIP ${CMAKE_MATCH_1} MATCHED) + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${MATCHED}, CUDA);\n") + endif() + endforeach() +endif() diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 9ddb3a5d29f973047507855b43b226913a3600b5..ea260a3e92b775023085fd02eec33e6ecfaf2e81 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -91,16 +91,12 @@ class ActivationOp : public framework::OperatorWithKernel { } }; -class ActivationOpInferVarType : public framework::VarTypeInference { - public: - void operator()(const framework::OpDesc& op_desc, - framework::BlockDesc* block) const override { - auto x_name = op_desc.Input("X")[0]; - auto out_name = op_desc.Output("Out")[0]; - auto& x = block->FindRecursiveOrCreateVar(x_name); - auto& out = block->FindRecursiveOrCreateVar(out_name); - out.SetType(x.GetType()); - out.SetDataType(x.GetDataType()); +class ActivationOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; } }; diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 3eb473832577bd348b33ba9b0be9e597b78f26bc..cf245f5038f5f5ad1b623542aa14686eff8aad32 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -170,6 +170,15 @@ The required data format for this layer is one of the following: } }; +class BatchNormOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; + } +}; + template class BatchNormKernel : public framework::OpKernel { @@ -525,7 +534,7 @@ class BatchNormGradMaker : public framework::SingleGradOpDescMaker { namespace ops = paddle::operators; REGISTER_OPERATOR(batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, - ops::BatchNormGradMaker); + ops::BatchNormOpInferVarType, ops::BatchNormGradMaker); REGISTER_OPERATOR(batch_norm_grad, ops::BatchNormGradOp); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 7401f100d723e0d331f403caff336620305879e1..4d370746382a4247f51aafa189e86eece941c320 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -224,6 +224,15 @@ $$ )DOC"); } +class ConvOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{ + {"Input", /*->*/ "Output"}}; + } +}; + void Conv3DOpMaker::Make() { AddInput( "Input", @@ -365,6 +374,7 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType( namespace ops = paddle::operators; REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker, + ops::ConvOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad); @@ -372,7 +382,9 @@ REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad); REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(depthwise_conv2d_grad, ops::ConvOpGrad); + REGISTER_OPERATOR(conv3d, ops::ConvOp, ops::Conv3DOpMaker, + ops::ConvOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad); diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 66f19fe7ecfa51b2ce917f0c5fcb6d486f1a7307..a904dd91302c951560dc32ac107d4d73b6024c25 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" +#include namespace paddle { namespace operators { @@ -179,6 +180,15 @@ or not. But the output only shares the LoD information with input X. )DOC"); } }; + +class CrossEntropyOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; + } +}; } // namespace operators } // namespace paddle @@ -186,6 +196,7 @@ namespace ops = paddle::operators; using CPUCtx = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, + ops::CrossEntropyOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index d5eec148f9b4f76866ec9fca98a596b9bc2860ef..e5c3f0eeb385e1a15fdbb12a989603996420efe3 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -22,6 +22,7 @@ iou_similarity_op.cu) detection_library(mine_hard_examples_op SRCS mine_hard_examples_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc poly_util.cc gpc.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu) +detection_library(density_prior_box_op SRCS density_prior_box_op.cc) detection_library(anchor_generator_op SRCS anchor_generator_op.cc anchor_generator_op.cu) detection_library(target_assign_op SRCS target_assign_op.cc diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cc b/paddle/fluid/operators/detection/density_prior_box_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..99df15c3226b4305a28a3912398d6d1c766daa73 --- /dev/null +++ b/paddle/fluid/operators/detection/density_prior_box_op.cc @@ -0,0 +1,175 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/density_prior_box_op.h" + +namespace paddle { +namespace operators { + +class DensityPriorBoxOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(Input) of DensityPriorBoxOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Image"), + "Input(Image) of DensityPriorBoxOp should not be null."); + + auto image_dims = ctx->GetInputDim("Image"); + auto input_dims = ctx->GetInputDim("Input"); + PADDLE_ENFORCE(image_dims.size() == 4, "The layout of image is NCHW."); + PADDLE_ENFORCE(input_dims.size() == 4, "The layout of input is NCHW."); + + PADDLE_ENFORCE_LT(input_dims[2], image_dims[2], + "The height of input must smaller than image."); + + PADDLE_ENFORCE_LT(input_dims[3], image_dims[3], + "The width of input must smaller than image."); + auto variances = ctx->Attrs().Get>("variances"); + + auto fixed_sizes = ctx->Attrs().Get>("fixed_sizes"); + auto fixed_ratios = ctx->Attrs().Get>("fixed_ratios"); + auto densities = ctx->Attrs().Get>("densities"); + + PADDLE_ENFORCE_EQ(fixed_sizes.size(), densities.size(), + "The number of fixed_sizes and densities must be equal."); + size_t num_priors = 0; + if ((fixed_sizes.size() > 0) && (densities.size() > 0)) { + for (size_t i = 0; i < densities.size(); ++i) { + if (fixed_ratios.size() > 0) { + num_priors += (fixed_ratios.size()) * (pow(densities[i], 2)); + } + } + } + std::vector dim_vec(4); + dim_vec[0] = input_dims[2]; + dim_vec[1] = input_dims[3]; + dim_vec[2] = num_priors; + dim_vec[3] = 4; + ctx->SetOutputDim("Boxes", framework::make_ddim(dim_vec)); + ctx->SetOutputDim("Variances", framework::make_ddim(dim_vec)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), + platform::CPUPlace()); + } +}; + +class DensityPriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput( + "Input", + "(Tensor, default Tensor), " + "the input feature data of DensityPriorBoxOp, the layout is NCHW."); + AddInput("Image", + "(Tensor, default Tensor), " + "the input image data of DensityPriorBoxOp, the layout is NCHW."); + AddOutput("Boxes", + "(Tensor, default Tensor), the output prior boxes of " + "DensityPriorBoxOp. The layout is [H, W, num_priors, 4]. " + "H is the height of input, W is the width of input, num_priors " + "is the box count of each position."); + AddOutput("Variances", + "(Tensor, default Tensor), the expanded variances of " + "DensityPriorBoxOp. The layout is [H, W, num_priors, 4]. " + "H is the height of input, W is the width of input, num_priors " + "is the box count of each position."); + AddAttr>("variances", + "(vector) List of variances to be " + "encoded in density prior boxes.") + .AddCustomChecker([](const std::vector& variances) { + PADDLE_ENFORCE_EQ(variances.size(), 4, + "Must and only provide 4 variance."); + for (size_t i = 0; i < variances.size(); ++i) { + PADDLE_ENFORCE_GT(variances[i], 0.0, + "variance[%d] must be greater than 0.", i); + } + }); + AddAttr("clip", "(bool) Whether to clip out-of-boundary boxes.") + .SetDefault(true); + + AddAttr( + "step_w", + "Density prior boxes step across width, 0.0 for auto calculation.") + .SetDefault(0.0) + .AddCustomChecker([](const float& step_w) { + PADDLE_ENFORCE_GE(step_w, 0.0, "step_w should be larger than 0."); + }); + AddAttr( + "step_h", + "Density prior boxes step across height, 0.0 for auto calculation.") + .SetDefault(0.0) + .AddCustomChecker([](const float& step_h) { + PADDLE_ENFORCE_GE(step_h, 0.0, "step_h should be larger than 0."); + }); + + AddAttr("offset", + "(float) " + "Density prior boxes center offset.") + .SetDefault(0.5); + AddAttr>("fixed_sizes", + "(vector) List of fixed sizes " + "of generated density prior boxes.") + .SetDefault(std::vector{}) + .AddCustomChecker([](const std::vector& fixed_sizes) { + for (size_t i = 0; i < fixed_sizes.size(); ++i) { + PADDLE_ENFORCE_GT(fixed_sizes[i], 0.0, + "fixed_sizes[%d] should be larger than 0.", i); + } + }); + + AddAttr>("fixed_ratios", + "(vector) List of fixed ratios " + "of generated density prior boxes.") + .SetDefault(std::vector{}) + .AddCustomChecker([](const std::vector& fixed_ratios) { + for (size_t i = 0; i < fixed_ratios.size(); ++i) { + PADDLE_ENFORCE_GT(fixed_ratios[i], 0.0, + "fixed_ratios[%d] should be larger than 0.", i); + } + }); + + AddAttr>("densities", + "(vector) List of densities " + "of generated density prior boxes.") + .SetDefault(std::vector{}) + .AddCustomChecker([](const std::vector& densities) { + for (size_t i = 0; i < densities.size(); ++i) { + PADDLE_ENFORCE_GT(densities[i], 0, + "densities[%d] should be larger than 0.", i); + } + }); + AddComment(R"DOC( + Density Prior box operator + Each position of the input produce N density prior boxes, N is determined by + the count of fixed_ratios, densities, the calculation of N is as follows: + for density in densities: + N += size(fixed_ratios)*density^2 + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(density_prior_box, ops::DensityPriorBoxOp, + ops::DensityPriorBoxOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OP_CPU_KERNEL(density_prior_box, ops::DensityPriorBoxOpKernel, + ops::DensityPriorBoxOpKernel); diff --git a/paddle/fluid/operators/detection/density_prior_box_op.h b/paddle/fluid/operators/detection/density_prior_box_op.h new file mode 100644 index 0000000000000000000000000000000000000000..9a52077e9cf90b278549a077af161bd4e282d972 --- /dev/null +++ b/paddle/fluid/operators/detection/density_prior_box_op.h @@ -0,0 +1,146 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/fluid/operators/detection/prior_box_op.h" + +namespace paddle { +namespace operators { + +template +class DensityPriorBoxOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto* image = ctx.Input("Image"); + auto* boxes = ctx.Output("Boxes"); + auto* vars = ctx.Output("Variances"); + + auto variances = ctx.Attr>("variances"); + auto clip = ctx.Attr("clip"); + + auto fixed_sizes = ctx.Attr>("fixed_sizes"); + auto fixed_ratios = ctx.Attr>("fixed_ratios"); + auto densities = ctx.Attr>("densities"); + + T step_w = static_cast(ctx.Attr("step_w")); + T step_h = static_cast(ctx.Attr("step_h")); + T offset = static_cast(ctx.Attr("offset")); + + auto img_width = image->dims()[3]; + auto img_height = image->dims()[2]; + + auto feature_width = input->dims()[3]; + auto feature_height = input->dims()[2]; + + T step_width, step_height; + if (step_w == 0 || step_h == 0) { + step_width = static_cast(img_width) / feature_width; + step_height = static_cast(img_height) / feature_height; + } else { + step_width = step_w; + step_height = step_h; + } + int num_priors = 0; + if (fixed_sizes.size() > 0 && densities.size() > 0) { + for (size_t i = 0; i < densities.size(); ++i) { + if (fixed_ratios.size() > 0) { + num_priors += (fixed_ratios.size()) * (pow(densities[i], 2)); + } + } + } + + boxes->mutable_data(ctx.GetPlace()); + vars->mutable_data(ctx.GetPlace()); + auto e_boxes = framework::EigenTensor::From(*boxes).setConstant(0.0); + + int step_average = static_cast((step_width + step_height) * 0.5); + + for (int h = 0; h < feature_height; ++h) { + for (int w = 0; w < feature_width; ++w) { + T center_x = (w + offset) * step_width; + T center_y = (h + offset) * step_height; + int idx = 0; + // Generate density prior boxes with fixed sizes. + for (size_t s = 0; s < fixed_sizes.size(); ++s) { + auto fixed_size = fixed_sizes[s]; + int density = densities[s]; + // Generate density prior boxes with fixed ratios. + if (fixed_ratios.size() > 0) { + for (size_t r = 0; r < fixed_ratios.size(); ++r) { + float ar = fixed_ratios[r]; + int shift = step_average / density; + float box_width_ratio = fixed_size * sqrt(ar); + float box_height_ratio = fixed_size / sqrt(ar); + for (int di = 0; di < density; ++di) { + for (int dj = 0; dj < density; ++dj) { + float center_x_temp = + center_x - step_average / 2. + shift / 2. + dj * shift; + float center_y_temp = + center_y - step_average / 2. + shift / 2. + di * shift; + e_boxes(h, w, idx, 0) = + (center_x_temp - box_width_ratio / 2.) / img_width >= 0 + ? (center_x_temp - box_width_ratio / 2.) / img_width + : 0; + e_boxes(h, w, idx, 1) = + (center_y_temp - box_height_ratio / 2.) / img_height >= 0 + ? (center_y_temp - box_height_ratio / 2.) / img_height + : 0; + e_boxes(h, w, idx, 2) = + (center_x_temp + box_width_ratio / 2.) / img_width <= 1 + ? (center_x_temp + box_width_ratio / 2.) / img_width + : 1; + e_boxes(h, w, idx, 3) = + (center_y_temp + box_height_ratio / 2.) / img_height <= 1 + ? (center_y_temp + box_height_ratio / 2.) / img_height + : 1; + idx++; + } + } + } + } + } + } + } + if (clip) { + platform::Transform trans; + ClipFunctor clip_func; + trans(ctx.template device_context(), + boxes->data(), boxes->data() + boxes->numel(), + boxes->data(), clip_func); + } + framework::Tensor var_t; + var_t.mutable_data( + framework::make_ddim({1, static_cast(variances.size())}), + ctx.GetPlace()); + + auto var_et = framework::EigenTensor::From(var_t); + + for (size_t i = 0; i < variances.size(); ++i) { + var_et(0, i) = variances[i]; + } + + int box_num = feature_height * feature_width * num_priors; + auto var_dim = vars->dims(); + vars->Resize({box_num, static_cast(variances.size())}); + + auto e_vars = framework::EigenMatrix::From(*vars); + + e_vars = var_et.broadcast(Eigen::DSizes(box_num, 1)); + + vars->Resize(var_dim); + } +}; // namespace operators + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index 5eb4233344e1c49e69dd9830178fd6fb2ae7e51c..f01f67692e1e5dd040971cb0dd1dd793648da97a 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -75,16 +75,12 @@ class ElementwiseOp : public framework::OperatorWithKernel { } }; -class ElementwiseOpInferVarType : public framework::VarTypeInference { - public: - void operator()(const framework::OpDesc &op_desc, - framework::BlockDesc *block) const override { - auto x_name = op_desc.Input("X")[0]; - auto out_name = op_desc.Output("Out")[0]; - auto &x = block->FindRecursiveOrCreateVar(x_name); - auto &out = block->FindRecursiveOrCreateVar(out_name); - out.SetType(x.GetType()); - out.SetDataType(x.GetDataType()); +class ElementwiseOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; } }; diff --git a/paddle/fluid/operators/gather.cu.h b/paddle/fluid/operators/gather.cu.h index d74d4db92528d69492ab7b90a98d3775dadc35d1..e4df59c5d51c390cf593add0c5562665c91f33f6 100644 --- a/paddle/fluid/operators/gather.cu.h +++ b/paddle/fluid/operators/gather.cu.h @@ -50,7 +50,9 @@ void GPUGather(const platform::DeviceContext& ctx, const Tensor& src, const Tensor& index, Tensor* output) { // PADDLE_ENFORCE(platform::is_gpu_place(place)); // check index of shape 1-D - PADDLE_ENFORCE(index.dims().size() == 1); + PADDLE_ENFORCE(index.dims().size() == 1 || + (index.dims().size() == 2 && index.dims()[1] == 1)); + int index_size = index.dims()[0]; auto src_dims = src.dims(); diff --git a/paddle/fluid/operators/gather.h b/paddle/fluid/operators/gather.h index d72e07d76c97e9e455e54980207d7c02842cc04b..dc08ee5efacde5e232d751b13aaf11f51237634a 100644 --- a/paddle/fluid/operators/gather.h +++ b/paddle/fluid/operators/gather.h @@ -38,7 +38,8 @@ void CPUGather(const platform::DeviceContext& ctx, const Tensor& src, const Tensor& index, Tensor* output) { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace())); // check index of shape 1-D - PADDLE_ENFORCE(index.dims().size() == 1); + PADDLE_ENFORCE(index.dims().size() == 1 || + (index.dims().size() == 2 && index.dims()[1] == 1)); int64_t index_size = index.dims()[0]; auto src_dims = src.dims(); diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index f84ff206fffddef1030b7ed439e887bdfef342a6..95aa9b573c795159079bdb5401b34d7a61252115 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -31,7 +31,8 @@ class GatherOp : public framework::OperatorWithKernel { "Output(Out) of GatherOp should not be null."); auto index_dims = ctx->GetInputDim("Index"); - PADDLE_ENFORCE(index_dims.size() == 1); + PADDLE_ENFORCE(index_dims.size() == 1 || + (index_dims.size() == 2 && index_dims[1] == 1)); int batch_size = ctx->GetInputDim("Index")[0]; framework::DDim output_dims(ctx->GetInputDim("X")); output_dims[0] = batch_size; @@ -53,6 +54,7 @@ class GatherGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*-->*/ framework::GradVarName("X")); } protected: @@ -75,7 +77,7 @@ Gather Operator. $Out = X[Index]$ -Out is obtained by gathering entries of the outer-most dimension +Out is obtained by gathering entries of the outer-most dimension of X indexed by Index and concatenate them together. Example: diff --git a/paddle/fluid/operators/mean_op.cc b/paddle/fluid/operators/mean_op.cc index 19426b3c204095bd415cebcd87cff18468acd564..820636defad0be9fb2e6decefc938658ae70ea9b 100644 --- a/paddle/fluid/operators/mean_op.cc +++ b/paddle/fluid/operators/mean_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/mean_op.h" - +#include namespace paddle { namespace operators { @@ -42,6 +42,14 @@ Mean Operator calculates the mean of all elements in X. } }; +class MeanOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + class MeanGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -50,6 +58,14 @@ class MeanGradOp : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->ShareLoD("X", framework::GradVarName("X")); } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto input_data_type = + framework::ToDataType(ctx.Input("X")->type()); + + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } }; class MeanGradMaker : public framework::SingleGradOpDescMaker { @@ -71,7 +87,8 @@ class MeanGradMaker : public framework::SingleGradOpDescMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(mean, ops::MeanOp, ops::MeanOpMaker, ops::MeanGradMaker); +REGISTER_OPERATOR(mean, ops::MeanOp, ops::MeanOpMaker, ops::MeanOpInferVarType, + ops::MeanGradMaker); REGISTER_OPERATOR(mean_grad, ops::MeanGradOp); REGISTER_OP_CPU_KERNEL( mean, ops::MeanKernel, diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index a2140ddc792a69e794a89c8056b1dd19a3661e38..08f2949d4a3774894912ae5251806b46e6240702 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -126,6 +126,14 @@ or not. But the output only shares the LoD information with input $X$. } }; +class MulOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + class MulGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -178,7 +186,8 @@ class MulOpGradMaker : public framework::SingleGradOpDescMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpGradMaker); +REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpInferVarType, + ops::MulOpGradMaker); REGISTER_OPERATOR(mul_grad, ops::MulGradOp); REGISTER_OP_CPU_KERNEL( mul, ops::MulKernel, diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 484cb65746612343fafc49fe61b607f2e919cf4f..46a95350a7293c18313811ba9b367fd65955145a 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -40,7 +40,7 @@ int PoolOutputSize(int input_size, int filter_size, int padding, int stride, return output_size; } -void PoolOp::InferShape(framework::InferShapeContext *ctx) const { +void PoolOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE(ctx->HasInput("X"), "X(Input) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Out(Output) of Pooling should not be null."); @@ -81,7 +81,7 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const { } framework::OpKernelType PoolOp::GetExpectedKernelType( - const framework::ExecutionContext &ctx) const { + const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; std::string data_format = ctx.Attr("data_format"); framework::DataLayout layout_ = framework::StringToDataLayout(data_format); @@ -104,7 +104,7 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( layout_, library_); } -void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { +void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Input(X@GRAD) should not be null."); @@ -112,7 +112,7 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { } framework::OpKernelType PoolOpGrad::GetExpectedKernelType( - const framework::ExecutionContext &ctx) const { + const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; std::string data_format = ctx.Attr("data_format"); framework::DataLayout layout_ = framework::StringToDataLayout(data_format); @@ -262,6 +262,14 @@ Example: )DOC"); } +class PoolOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + void Pool3dOpMaker::Make() { AddInput("X", "(Tensor) The input tensor of pooling operator. " @@ -372,6 +380,7 @@ Example: namespace ops = paddle::operators; REGISTER_OPERATOR(pool2d, ops::PoolOp, ops::Pool2dOpMaker, + ops::PoolOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(pool2d_grad, ops::PoolOpGrad); @@ -383,6 +392,7 @@ REGISTER_OP_CPU_KERNEL( ops::PoolGradKernel); REGISTER_OPERATOR(pool3d, ops::PoolOp, ops::Pool3dOpMaker, + ops::PoolOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(pool3d_grad, ops::PoolOpGrad); diff --git a/paddle/fluid/operators/reduce_max_op.cu b/paddle/fluid/operators/reduce_max_op.cu index 0d86b3127e42f7ee14ba57b1c762e8128a0f2d54..b21da178f3eeaafa41bde5f64cc4abcf7944b032 100644 --- a/paddle/fluid/operators/reduce_max_op.cu +++ b/paddle/fluid/operators/reduce_max_op.cu @@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_max, int, ops::MaxFunctor>, ops::ReduceKernel); -REGISTER_OP_CUDA_KERNEL( - reduce_max_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_max_op.part.cu b/paddle/fluid/operators/reduce_max_op.part.cu new file mode 100644 index 0000000000000000000000000000000000000000..6954c8d744faee6f8f0b715d6e4c8e3bcda7fb83 --- /dev/null +++ b/paddle/fluid/operators/reduce_max_op.part.cu @@ -0,0 +1,25 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_min_max_op.h" + +REGISTER_OP_CUDA_KERNEL( + reduce_max_grad, ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_mean_op.cu b/paddle/fluid/operators/reduce_mean_op.cu index 59b30244839849d79e3e531953134633503c4090..4408200d2d052c2f68c2dd35619de6ed67f07f6e 100644 --- a/paddle/fluid/operators/reduce_mean_op.cu +++ b/paddle/fluid/operators/reduce_mean_op.cu @@ -69,13 +69,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_mean, ops::ReduceMeanKernel, ops::ReduceMeanKernel, ops::ReduceMeanKernel, ops::ReduceMeanKernel); - -REGISTER_OP_CUDA_KERNEL( - reduce_mean_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_mean_op.part.cu b/paddle/fluid/operators/reduce_mean_op.part.cu new file mode 100644 index 0000000000000000000000000000000000000000..4b663bcdca7c20f8802d962a362f429d8eafe9af --- /dev/null +++ b/paddle/fluid/operators/reduce_mean_op.part.cu @@ -0,0 +1,26 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// .part used to speed up nvcc compile +#include "paddle/fluid/operators/reduce_mean_op.h" + +REGISTER_OP_CUDA_KERNEL( + reduce_mean_grad, ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_min_op.cu b/paddle/fluid/operators/reduce_min_op.cu index da466f805eff4709dc23471baef03e94052ee6c1..5a04a12b79444dcea30d3c1140d9708a98b55fe3 100644 --- a/paddle/fluid/operators/reduce_min_op.cu +++ b/paddle/fluid/operators/reduce_min_op.cu @@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_min, int, ops::MinFunctor>, ops::ReduceKernel); -REGISTER_OP_CUDA_KERNEL( - reduce_min_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_min_op.part.cu b/paddle/fluid/operators/reduce_min_op.part.cu new file mode 100644 index 0000000000000000000000000000000000000000..5b8f061b2d03eb76863401905ac87044fd5ea778 --- /dev/null +++ b/paddle/fluid/operators/reduce_min_op.part.cu @@ -0,0 +1,25 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_min_max_op.h" + +REGISTER_OP_CUDA_KERNEL( + reduce_min_grad, ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_prod_op.cu b/paddle/fluid/operators/reduce_prod_op.cu index d62e677d92cffecf629d1684026b0c7bcfec29e3..d8692afb96e4d5d3206210060684dd12fb4d79a7 100644 --- a/paddle/fluid/operators/reduce_prod_op.cu +++ b/paddle/fluid/operators/reduce_prod_op.cu @@ -23,12 +23,3 @@ REGISTER_OP_CUDA_KERNEL(reduce_prod, int, ops::ProdFunctor>, ops::ReduceKernel); -REGISTER_OP_CUDA_KERNEL( - reduce_prod_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_prod_op.part.cu b/paddle/fluid/operators/reduce_prod_op.part.cu new file mode 100644 index 0000000000000000000000000000000000000000..486c578c64b9a2d80abc940a7c4266ef5fd23c7f --- /dev/null +++ b/paddle/fluid/operators/reduce_prod_op.part.cu @@ -0,0 +1,25 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_prod_op.h" + +REGISTER_OP_CUDA_KERNEL( + reduce_prod_grad, ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_sum_op.cu b/paddle/fluid/operators/reduce_sum_op.cu index 53cd9e9419dd9aecee730917ae21d7a4ab332ffc..2b031e8df99768c9208146640bddbe51149b2614 100644 --- a/paddle/fluid/operators/reduce_sum_op.cu +++ b/paddle/fluid/operators/reduce_sum_op.cu @@ -64,13 +64,3 @@ class ReduceSumKernel : public framework::OpKernel { REGISTER_OP_CUDA_KERNEL(reduce_sum, ops::ReduceSumKernel, ops::ReduceSumKernel, ops::ReduceSumKernel, ops::ReduceSumKernel); - -REGISTER_OP_CUDA_KERNEL( - reduce_sum_grad, ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel, - ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/reduce_sum_op.part.cu b/paddle/fluid/operators/reduce_sum_op.part.cu new file mode 100644 index 0000000000000000000000000000000000000000..525633f62a95b2d0d677fcbebe551b75cb2a180d --- /dev/null +++ b/paddle/fluid/operators/reduce_sum_op.part.cu @@ -0,0 +1,26 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/cub_reduce.h" +#include "paddle/fluid/operators/reduce_sum_op.h" + +REGISTER_OP_CUDA_KERNEL( + reduce_sum_grad, ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel, + ops::ReduceGradKernel); diff --git a/paddle/fluid/operators/scatter.cu.h b/paddle/fluid/operators/scatter.cu.h index ac7d69bfb549fd98c76fcf834e8d3ad9bec2ef23..b2e79f6c82bb748293f4219845e6798347c8c46e 100644 --- a/paddle/fluid/operators/scatter.cu.h +++ b/paddle/fluid/operators/scatter.cu.h @@ -51,7 +51,8 @@ void GPUScatterAssign(const platform::DeviceContext& ctx, const Tensor& src, const Tensor& index, Tensor* output) { // PADDLE_ENFORCE(platform::is_gpu_place(place)); // check index of shape 1-D - PADDLE_ENFORCE(index.dims().size() == 1); + PADDLE_ENFORCE(index.dims().size() == 1 || + (index.dims().size() == 2 && index.dims()[1] == 1)); int index_size = index.dims()[0]; auto src_dims = src.dims(); diff --git a/paddle/fluid/operators/scatter.h b/paddle/fluid/operators/scatter.h index 39af717615c01f5c121e32b176b74d05be738531..8bae6606c94620ab4fa8ae34f69236e7e87e9670 100644 --- a/paddle/fluid/operators/scatter.h +++ b/paddle/fluid/operators/scatter.h @@ -37,7 +37,8 @@ void ScatterAssign(const platform::DeviceContext& ctx, const Tensor& src, const Tensor& index, Tensor* output) { PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace())); // check index of shape 1-D - PADDLE_ENFORCE(index.dims().size() == 1); + PADDLE_ENFORCE(index.dims().size() == 1 || + (index.dims().size() == 2 && index.dims()[1] == 1)); int index_size = index.dims()[0]; auto src_dims = src.dims(); diff --git a/paddle/fluid/operators/similarity_focus_op.cc b/paddle/fluid/operators/similarity_focus_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..9612f82b6d45dc4e08bfe288ddd1c7790875ee4d --- /dev/null +++ b/paddle/fluid/operators/similarity_focus_op.cc @@ -0,0 +1,87 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/similarity_focus_op.h" + +namespace paddle { +namespace operators { +class SimilarityFocusOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor), a 4-D tensor with shape," + " [BatchSize, X, Y, Z]"); + AddOutput("Out", + "(Tensor, default Tensor), the similarity focus mask" + " with the same shape of input X."); + AddAttr("axis", + "(int32), indicating the dimension to be select. It can" + " only be 1, 2, or 3."); + AddAttr>("indexes", + "(std::vector), indicating the indexes" + " of the selected dimension."); + AddComment(R"DOC( +SimilarityFocus Operator. + +Generate a similarity focus mask with the same shape of input using the following method: +1. Extract the 3-D tensor(here the first dimension is BatchSize) corresponding + to the axis according to the indexes. For example, if axis=1 and indexes=[a], + it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X + is (BatchSize, A, B, C), the shape of tensor T is (BatchSize, B, C). +2. For each index, find the largest numbers in the tensor T, so that the same + row and same column has at most one number(what it means is that if the + largest number has been found in the i-th row and the j-th column, then + the numbers in the i-th row or j-th column will be skipped. And then the + next largest number will be selected from the remaining numbers. Obviously + there will be min(B, C) numbers), and mark the corresponding position of the + 3-D similarity focus mask as 1, otherwise as 0. Do elementwise-or for + each index. +3. Broadcast the 3-D similarity focus mask to the same shape of input X. + +Refer to `Similarity Focus Layer `_ +)DOC"); + } +}; + +class SimilarityFocusOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should be not null."); + auto x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(x_dims.size(), 4, "Input(X)'s rank should be 4."); + ctx->SetOutputDim("Out", x_dims); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + platform::CPUPlace()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(similarity_focus, ops::SimilarityFocusOp, + ops::SimilarityFocusOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(similarity_focus, ops::SimilarityFocusKernel, + ops::SimilarityFocusKernel); diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h new file mode 100644 index 0000000000000000000000000000000000000000..bf3fed2aaf2cf92d5619ae5bce6dd70d9dfe9621 --- /dev/null +++ b/paddle/fluid/operators/similarity_focus_op.h @@ -0,0 +1,168 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class SimilarityFocusKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + Tensor* out = context.Output("Out"); + const Tensor* x = context.Input("X"); + T* out_data = out->mutable_data(context.GetPlace()); + const T* x_data = x->data(); + + int axis = context.Attr("axis"); + std::vector indexes = context.Attr>("indexes"); + + int64_t batch_size = x->dims()[0]; + int64_t dim[4]; + for (int i = 1; i <= 3; ++i) { + dim[i] = x->dims()[i]; + } + + if (indexes.size() < 1) { + PADDLE_THROW("Indexes' size can not be 0."); + } + for (auto index : indexes) { + if (dim[axis] < index) { + PADDLE_THROW("Index exceeds tensor shape limit."); + } + } + + int64_t array_size = 1; + for (int i = 1; i <= 3; ++i) { + if (i != axis) { + array_size *= dim[i]; + } + } + + std::vector> array(array_size); + + bool (*cmp)(std::pair, std::pair) = []( + std::pair x, std::pair y) { + return x.first > y.first; + }; + + int64_t (*compute_index)(int64_t*, int, int, int, int) = []( + int64_t* dim, int d1, int d2, int d3, int d4) { + return d1 * dim[1] * dim[2] * dim[3] + d2 * dim[2] * dim[3] + + d3 * dim[3] + d4; + }; + + memset(out_data, 0, sizeof(T) * batch_size * dim[1] * dim[2] * dim[3]); + for (int i = 0; i < batch_size; ++i) { + for (auto index : indexes) { + if (axis == 1) { + for (int j = 0; j < dim[2]; ++j) { + for (int k = 0; k < dim[3]; ++k) { + array[j * dim[3] + k] = std::make_pair( + x_data[compute_index(dim, i, index, j, k)], j * dim[3] + k); + } + } + + std::sort(array.begin(), array.end(), cmp); + int tag_num = 0; + std::vector tag2(dim[2]), tag3(dim[3]); + for (auto x : array) { + int idx2 = x.second / dim[3]; + int idx3 = x.second % dim[3]; + if (tag2[idx2] || tag3[idx3]) { + continue; + } + tag_num++; + tag2[idx2] = true; + tag3[idx3] = true; + for (int j = 0; j < dim[1]; ++j) { + out_data[compute_index(dim, i, j, idx2, idx3)] = 1; + } + if (tag_num == std::min(dim[2], dim[3])) { + break; + } + } + } else if (axis == 2) { + for (int j = 0; j < dim[1]; ++j) { + for (int k = 0; k < dim[3]; ++k) { + array[j * dim[3] + k] = std::make_pair( + x_data[compute_index(dim, i, j, index, k)], j * dim[3] + k); + } + } + + std::sort(array.begin(), array.end(), cmp); + int tag_num = 0; + std::vector tag1(dim[1]), tag3(dim[3]); + for (auto x : array) { + int idx1 = x.second / dim[3]; + int idx3 = x.second % dim[3]; + if (tag1[idx1] || tag3[idx3]) { + continue; + } + tag_num++; + tag1[idx1] = true; + tag3[idx3] = true; + for (int j = 0; j < dim[2]; ++j) { + out_data[compute_index(dim, i, idx1, j, idx3)] = 1; + } + if (tag_num == std::min(dim[1], dim[3])) { + break; + } + } + } else if (axis == 3) { + for (int j = 0; j < dim[1]; ++j) { + for (int k = 0; k < dim[2]; ++k) { + array[j * dim[2] + k] = std::make_pair( + x_data[compute_index(dim, i, j, k, index)], j * dim[2] + k); + } + } + + std::sort(array.begin(), array.end(), cmp); + int tag_num = 0; + std::vector tag1(dim[1]), tag2(dim[2]); + for (auto x : array) { + int idx1 = x.second / dim[2]; + int idx2 = x.second % dim[2]; + if (tag1[idx1] || tag2[idx2]) { + continue; + } + tag_num++; + tag1[idx1] = true; + tag2[idx2] = true; + for (int j = 0; j < dim[3]; ++j) { + out_data[compute_index(dim, i, idx1, idx2, j)] = 1; + } + if (tag_num == std::min(dim[1], dim[2])) { + break; + } + } + } else { + PADDLE_THROW("Axis must be 1 or 2 or 3"); + } + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index a4bdbe6648afa7c91a056af4737bb5d826229022..9e21b6c824bfd7d1c1090e5ba3ba2f6aa9bdb230 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -124,6 +124,14 @@ For each row $i$ and each column $j$ in the matrix, we have: } }; +class SoftmaxOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Out"}}; + } +}; + class SoftmaxOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -196,7 +204,7 @@ class SoftmaxOpGradMaker : public framework::SingleGradOpDescMaker { namespace ops = paddle::operators; REGISTER_OPERATOR(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker, - ops::SoftmaxOpGradMaker); + ops::SoftmaxOpInferVarType, ops::SoftmaxOpGradMaker); REGISTER_OPERATOR(softmax_grad, ops::SoftmaxOpGrad); REGISTER_OP_CPU_KERNEL( softmax, ops::SoftmaxKernel, diff --git a/paddle/fluid/operators/stack_op.cc b/paddle/fluid/operators/stack_op.cc index 3f4b48bc7391def082c82ed451fc5a752009a2f1..9345b495415d203728238c19621a20f446c40bf5 100644 --- a/paddle/fluid/operators/stack_op.cc +++ b/paddle/fluid/operators/stack_op.cc @@ -21,8 +21,12 @@ REGISTER_OPERATOR(stack, ops::StackOp, ops::StackOpMaker, REGISTER_OPERATOR(stack_grad, ops::StackOpGrad); REGISTER_OP_CPU_KERNEL(stack, ops::StackKernel, - ops::StackKernel); + ops::StackKernel, + ops::StackKernel, + ops::StackKernel); REGISTER_OP_CPU_KERNEL(stack_grad, ops::StackGradKernel, - ops::StackGradKernel); + ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel); diff --git a/paddle/fluid/operators/stack_op.cu b/paddle/fluid/operators/stack_op.cu index 92c1bde2bcf089e5c715e90e564408e6ad37ba17..bf2a9e5b3d22996e688621727cb280dc9aed7859 100644 --- a/paddle/fluid/operators/stack_op.cu +++ b/paddle/fluid/operators/stack_op.cu @@ -18,8 +18,12 @@ namespace plat = paddle::platform; namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL(stack, ops::StackKernel, - ops::StackKernel); + ops::StackKernel, + ops::StackKernel, + ops::StackKernel); REGISTER_OP_CUDA_KERNEL(stack_grad, ops::StackGradKernel, - ops::StackGradKernel); + ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel); diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..96dc123f6a36e1a2b6ae04e0d97dffe1e10ac4ea --- /dev/null +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -0,0 +1,246 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace operators { +using framework::Tensor; + +void LodTensorArray2LodTensorVector(const framework::Scope &scope, + const std::string &base_name, + const std::string &lod_tensor_array_name, + std::vector *res_names) { + auto &inx = + scope.FindVar(lod_tensor_array_name)->Get(); + for (size_t i = 0; i < inx.size(); i++) { + std::string var_name = base_name + std::to_string(i); + framework::Variable *g_feed_value = + const_cast(scope).Var(var_name); + auto &feed_input = + *(g_feed_value->GetMutable()); + feed_input.ShareDataWith(inx[i]); + res_names->push_back(var_name); + } +} + +void LodTensorVectorResizeFromLodTensorArray( + const framework::Scope &scope, const std::string &base_name, + const std::string &lod_tensor_array_name, + std::vector *res_names) { + auto &inx = + scope.FindVar(lod_tensor_array_name)->Get(); + for (size_t i = 0; i < inx.size(); i++) { + std::string var_name = base_name + std::to_string(i); + framework::Variable *g_feed_value = + const_cast(scope).Var(var_name); + auto &feed_input = + *(g_feed_value->GetMutable()); + auto dims = inx[i].dims(); + feed_input.Resize(dims); + res_names->push_back(var_name); + } +} + +void LodTensorArrayCreateFromLodTensorArray( + const framework::Scope &scope, + const std::string &input_lod_tensor_array_name, + const std::string &output_lod_tensor_array_name) { + auto &inx = scope.FindVar(input_lod_tensor_array_name) + ->Get(); + auto &grad_inx = *scope.FindVar(output_lod_tensor_array_name) + ->GetMutable(); + + for (size_t i = 0; i < inx.size(); i++) { + std::string var_name = output_lod_tensor_array_name + std::to_string(i); + framework::Variable *g_feed_value = + const_cast(scope).Var(var_name); + auto &feed_input = + *(g_feed_value->GetMutable()); + grad_inx.push_back(feed_input); + } +} + +class LoDTensorArray2TensorOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto axis = Attr("axis"); + + framework::AttributeMap attrs; + attrs["axis"] = axis; + + auto &inx = scope.FindVar(Input("X"))->Get(); + auto &out = + *scope.FindVar(Output("Out"))->GetMutable(); + auto &out_inx = + *scope.FindVar(Output("OutIndex"))->GetMutable(); + + const size_t n = inx.size(); + PADDLE_ENFORCE_GT(n, 0, "Input tensorarray size should > 0."); + + std::string base_name = Inputs("X")[0]; + std::vector names; + + // get the input tensorarray items' dim in out_inx + auto out_inx_dim = out_inx.dims(); + out_inx_dim[0] = inx.size(); + out_inx.Resize(out_inx_dim); + + std::string var_name = "out_index"; + framework::Variable *tmp_index_var = + const_cast(scope).Var(var_name); + auto &tmp_index_tensor = + *(tmp_index_var->GetMutable()); + tmp_index_tensor.Resize(out_inx_dim); + int *tmp_index_data = + tmp_index_tensor.mutable_data(platform::CPUPlace()); + + auto out_dims = inx[0].dims(); + size_t out_dim_sum = 0; + for (size_t index = 0; index < inx.size(); index++) { + auto inx_dims = inx[index].dims(); + out_dim_sum += inx_dims[axis]; + tmp_index_data[index] = inx_dims[axis]; + } + out_inx.ShareDataWith(tmp_index_tensor); + + // get input array items' dims + out_dims[axis] = out_dim_sum; + out.Resize(out_dims); + + LodTensorArray2LodTensorVector(scope, base_name, Input("X"), &names); + // Invoke Reshape Op + auto concat_op = framework::OpRegistry::CreateOp( + "concat", {{"X", names}}, {{"Out", {Output("Out")}}}, attrs); + + concat_op->Run(scope, place); + } +}; + +class LoDTensorArray2TensorOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "Input LoDTensorArray of tensor_array_to_tensor operator."); + AddOutput("Out", "Output tensor of tensor_array_to_tensor operator."); + AddOutput("OutIndex", + "Output input LoDTensorArray items' dims of " + "tensor_array_to_tensor operator."); + AddAttr("axis", + "The axis along which the input tensors will be concatenated.") + .SetDefault(0); + AddComment(R"DOC( +tensor_array_to_tensor Operator. + +Concatenate the input LoDTensorArray along dimension axis to the output Tensor. +Examples: + Input = {[1,2], [3,4], [5,6]} + axis = 0 + Output = [[1,2], + [3,4], + [5,6]] + OutputIndex = [1,1,1] + +)DOC"); + } +}; + +class LoDTensorArray2TensorOpInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override {} +}; + +class LoDTensorArray2TensorGradInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override {} +}; + +class LoDTensorArray2TensorGradInferVarType + : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { + for (auto &out_var : op_desc.Output(framework::GradVarName("X"))) { + block->Var(out_var)->SetType(framework::proto::VarType::LOD_TENSOR_ARRAY); + } + } +}; + +class LoDTensorArray2TensorGradOp : public framework::OperatorBase { + public: + using OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { + auto axis = Attr("axis"); + framework::AttributeMap attrs; + attrs["axis"] = axis; + + auto &inx = scope.FindVar(Input("X"))->Get(); + const size_t n = inx.size(); + PADDLE_ENFORCE_GT(n, 0, "Input tensorarray size should > 0."); + + std::string base_name = Inputs("X")[0]; + std::vector names; + + LodTensorArray2LodTensorVector(scope, base_name, Input("X"), &names); + + // grad + auto dx_name = Output(framework::GradVarName("X")); + auto dout_name = Input(framework::GradVarName("Out")); + + std::vector grad_names; + + LodTensorVectorResizeFromLodTensorArray(scope, "grad_name", Input("X"), + &grad_names); + + auto concat_grad_op = framework::OpRegistry::CreateOp( + "concat_grad", {{"X", names}, {"Out@GRAD", {dout_name}}}, + {{"X@GRAD", grad_names}}, attrs); + + concat_grad_op->Run(scope, place); + + LodTensorArrayCreateFromLodTensorArray(scope, Input("X"), dx_name); + auto &grad_inx = + *scope.FindVar(dx_name)->GetMutable(); + + for (size_t i = 0; i < grad_names.size(); i++) { + std::string var_name = grad_names[i]; + auto &feed_input = scope.FindVar(var_name)->Get(); + grad_inx[i].ShareDataWith(feed_input); + } + } +}; + +} // namespace operators +} // namespace paddle +USE_OP(concat); + +namespace ops = paddle::operators; +REGISTER_OPERATOR(tensor_array_to_tensor, ops::LoDTensorArray2TensorOp, + ops::LoDTensorArray2TensorOpMaker, + ops::LoDTensorArray2TensorOpInferShape, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(tensor_array_to_tensor_grad, ops::LoDTensorArray2TensorGradOp, + ops::LoDTensorArray2TensorGradInferShape, + ops::LoDTensorArray2TensorGradInferVarType); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 595af0c29a5c2c30427a82205d3d56c49492c0c8..a51c9becd416af243cb473c8856141db8d9f3bf0 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -139,6 +139,7 @@ function cmake_gen() { -DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} -DWITH_MKL=${WITH_MKL:-ON} + -DWITH_NGRAPH=${WITH_NGRAPH:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} @@ -171,6 +172,7 @@ EOF -DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} \ -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \ -DWITH_MKL=${WITH_MKL:-ON} \ + -DWITH_NGRAPH=${WITH_NGRAPH:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \ diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index b79b00846ecd21398bb5b546e757694932f772c2..0b997009bffebb6c7a02c16b643c9d6c49af103b 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -34,6 +34,7 @@ from . import regularizer from . import average from . import metrics from . import transpiler +from . import distribute_lookup_table from .param_attr import ParamAttr, WeightNormParamAttr from .data_feeder import DataFeeder from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope diff --git a/python/paddle/fluid/distribute_lookup_table.py b/python/paddle/fluid/distribute_lookup_table.py new file mode 100644 index 0000000000000000000000000000000000000000..52d9ce75f8d73eb3c3e8683bc0793e9dd8fbe48d --- /dev/null +++ b/python/paddle/fluid/distribute_lookup_table.py @@ -0,0 +1,39 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +LOOKUP_TABLE_TYPE = "lookup_table" + + +def find_distributed_lookup_table(program): + """ + Find distribute lookup table in program. + We only support one distribute table now. + :param program: + :return: table_name or None + """ + table_name = None + + for op in program.global_block().ops: + if op.type == LOOKUP_TABLE_TYPE: + if op.attr('is_distributed') is True: + if table_name is None: + table_name = op.input("W")[0] + if table_name != op.input("W")[0]: + raise RuntimeError("all distributed lookup_table_ops" + " should have only one table") + else: + if table_name is not None: + assert op.input("W")[0] != table_name + + return table_name diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 4ac94981a7a47530fe6ae4d968212c62dd3e0a93..96b6705e26c0f8d8d223e9020192a8f330c2c727 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -31,6 +31,7 @@ from functools import reduce __all__ = [ 'prior_box', + 'density_prior_box', 'multi_box_head', 'bipartite_match', 'target_assign', @@ -1023,6 +1024,135 @@ def prior_box(input, return box, var +def density_prior_box(input, + image, + densities=None, + fixed_sizes=None, + fixed_ratios=None, + variance=[0.1, 0.1, 0.2, 0.2], + clip=False, + steps=[0.0, 0.0], + offset=0.5, + name=None): + """ + **Density Prior Box Operator** + + Generate density prior boxes for SSD(Single Shot MultiBox Detector) + algorithm. Each position of the input produce N prior boxes, N is + determined by the count of densities, fixed_sizes and fixed_ratios. + Boxes center at grid points around each input position is generated by + this operator, and the grid points is determined by densities and + the count of density prior box is determined by fixed_sizes and fixed_ratios. + Obviously, the number of fixed_sizes is equal to the number of densities. + For densities_i in densities: + N_density_prior_box =sum(N_fixed_ratios * densities_i^2), + + Args: + input(Variable): The Input Variables, the format is NCHW. + image(Variable): The input image data of PriorBoxOp, + the layout is NCHW. + densities(list|tuple|None): the densities of generated density prior + boxes, this attribute should be a list or tuple of integers. + Default: None. + fixed_sizes(list|tuple|None): the fixed sizes of generated density + prior boxes, this attribute should a list or tuple of same + length with :attr:`densities`. Default: None. + fixed_ratios(list|tuple|None): the fixed ratios of generated density + prior boxes, if this attribute is not set and :attr:`densities` + and :attr:`fix_sizes` is set, :attr:`aspect_ratios` will be used + to generate density prior boxes. + variance(list|tuple): the variances to be encoded in density prior boxes. + Default:[0.1, 0.1, 0.2, 0.2]. + clip(bool): Whether to clip out-of-boundary boxes. Default: False. + step(list|turple): Prior boxes step across width and height, If + step[0] == 0.0/step[1] == 0.0, the density prior boxes step across + height/weight of the input will be automatically calculated. + Default: [0., 0.] + offset(float): Prior boxes center offset. Default: 0.5 + name(str): Name of the density prior box op. Default: None. + + Returns: + tuple: A tuple with two Variable (boxes, variances) + + boxes: the output density prior boxes of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input, + num_priors is the total + box count of each position of input. + + variances: the expanded variances of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input + num_priors is the total + box count of each position of input + + + Examples: + .. code-block:: python + + box, var = fluid.layers.density_prior_box( + input=conv1, + image=images, + min_sizes=[100.], + max_sizes=[200.], + aspect_ratios=[1.0, 1.0 / 2.0, 2.0], + densities=[3, 4], + fixed_sizes=[50., 60.], + fixed_ratios=[1.0, 3.0, 1.0 / 3.0], + flip=True, + clip=True) + """ + helper = LayerHelper("density_prior_box", **locals()) + dtype = helper.input_dtype() + + def _is_list_or_tuple_(data): + return (isinstance(data, list) or isinstance(data, tuple)) + + if not _is_list_or_tuple_(densities): + raise TypeError('densities should be a list or a tuple or None.') + if not _is_list_or_tuple_(fixed_sizes): + raise TypeError('fixed_sizes should be a list or a tuple or None.') + if not _is_list_or_tuple_(fixed_ratios): + raise TypeError('fixed_ratios should be a list or a tuple or None.') + if len(densities) != len(fixed_sizes): + raise ValueError('densities and fixed_sizes length should be euqal.') + if not (_is_list_or_tuple_(steps) and len(steps) == 2): + raise ValueError('steps should be a list or tuple ', + 'with length 2, (step_width, step_height).') + + densities = list(map(int, densities)) + fixed_sizes = list(map(float, fixed_sizes)) + fixed_ratios = list(map(float, fixed_ratios)) + steps = list(map(float, steps)) + + attrs = { + 'variances': variance, + 'clip': clip, + 'step_w': steps[0], + 'step_h': steps[1], + 'offset': offset, + } + if densities is not None and len(densities) > 0: + attrs['densities'] = densities + if fixed_sizes is not None and len(fixed_sizes) > 0: + attrs['fixed_sizes'] = fixed_sizes + if fixed_ratios is not None and len(fixed_ratios) > 0: + attrs['fixed_ratios'] = fixed_ratios + + box = helper.create_variable_for_type_inference(dtype) + var = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type="density_prior_box", + inputs={"Input": input, + "Image": image}, + outputs={"Boxes": box, + "Variances": var}, + attrs=attrs, ) + box.stop_gradient = True + var.stop_gradient = True + return box, var + + def multi_box_head(inputs, image, base_size, diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0a39587574c06a093b9f9e407bb82fc28612ff0d..625dee474abe27eb167d3e36eb5b5dce5e46d351 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -160,10 +160,12 @@ __all__ = [ 'affine_grid', 'sequence_reverse', 'affine_channel', + 'similarity_focus', 'hash', 'grid_sampler', 'log_loss', 'add_position_encoding', + 'bilinear_tensor_product', ] @@ -4065,8 +4067,8 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None): Examples: .. code-block:: python - x = fluid.layers.data(name='x', shape=[8], dtype='float32') - y = fluid.layers.data(name='y', shape=[7], dtype='float32') + x = fluid.layers.data(name='x', shape=[1], dtype='float32') + y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.edit_distance(input=x,label=y) """ helper = LayerHelper("edit_distance", **locals()) @@ -4740,7 +4742,8 @@ def softmax_with_cross_entropy(logits, label, soft_label=False, ignore_index=-100, - numeric_stable_mode=False): + numeric_stable_mode=False, + return_softmax=False): """ **Softmax With Cross Entropy Operator.** @@ -4804,9 +4807,15 @@ def softmax_with_cross_entropy(logits, the algorithm is always numerically stable. Note that the speed may be slower when use stable algorithm. Default: False + return_softmax (bool): A flag indicating whether to return the softmax + along with the cross entropy loss. Default: False Returns: - Variable: The cross entropy loss is a 2-D tensor with shape [N x 1]. + Variable or Tuple of two Variables: Return the cross entropy loss if + `return_softmax` is False, otherwise the tuple + (loss, softmax), where the cross entropy loss is + a 2-D tensor with shape [N x 1], and softmax is a + 2-D tensor with shape [N x K]. Examples: .. code-block:: python @@ -4831,6 +4840,10 @@ def softmax_with_cross_entropy(logits, 'ignore_index': ignore_index, 'numeric_stable_mode': numeric_stable_mode }) + + if return_softmax: + return loss, softmax + return loss @@ -7933,6 +7946,118 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): return out +def similarity_focus(input, axis, indexes, name=None): + """ + SimilarityFocus Operator + + Generate a similarity focus mask with the same shape of input using the following method: + 1. Extract the 3-D tensor(here the first dimension is BatchSize) corresponding + to the axis according to the indexes. For example, if axis=1 and indexes=[a], + it will get the matrix T=X[:, a, :, :]. In this case, if the shape of input X + is (BatchSize, A, B, C), the shape of tensor T is (BatchSize, B, C). + 2. For each index, find the largest numbers in the tensor T, so that the same + row and same column has at most one number(what it means is that if the + largest number has been found in the i-th row and the j-th column, then + the numbers in the i-th row or j-th column will be skipped. And then the + next largest number will be selected from the remaining numbers. Obviously + there will be min(B, C) numbers), and mark the corresponding position of the + 3-D similarity focus mask as 1, otherwise as 0. Do elementwise-or for + each index. + 3. Broadcast the 3-D similarity focus mask to the same shape of input X. + + Refer to `Similarity Focus Layer `_ + + .. code-block:: text + + * Example : + + Given a 4-D tensor x with the shape (BatchSize, C, A, B), where C is + the number of channels and the shape of feature map is (A, B): + x.shape = (2, 3, 2, 2) + x.data = [[[[0.8, 0.1], + [0.4, 0.5]], + + [[0.9, 0.7], + [0.9, 0.9]], + + [[0.8, 0.9], + [0.1, 0.2]]], + + + [[[0.2, 0.5], + [0.3, 0.4]], + + [[0.9, 0.7], + [0.8, 0.4]], + + [[0.0, 0.2], + [0.4, 0.7]]]] + + Given axis: 1 (the axis of the channel) + Given indexes: [0] + + then we get a 4-D tensor out with the same shape of input x: + out.shape = (2, 3, 2, 2) + out.data = [[[[1.0, 0.0], + [0.0, 1.0]], + + [[1.0, 0.0], + [0.0, 1.0]], + + [[1.0, 0.0], + [0.0, 1.0]]], + + [[[0.0, 1.0], + [1.0, 0.0]], + + [[0.0, 1.0], + [1.0, 0.0]], + + [[0.0, 1.0], + [1.0, 0.0]]]] + + Args: + input(Variable): The input tensor variable(default float). It should + be a 4-D tensor with shape [BatchSize, A, B, C]. + axis(int): Indicating the dimension to be selected. It can only be + 1, 2 or 3. + indexes(list): Indicating the indexes of the selected dimension. + + Returns: + Variable: A tensor variable with the same shape and same type + as the input. + + Examples: + .. code-block:: python + data = fluid.layers.data( + name='data', shape=[2, 3, 2, 2], dtype='float32') + x = fluid.layers.layer_norm(input=data, axis=1, indexes=[0]) + """ + helper = LayerHelper('similarity_focus', **locals()) + # check attrs + if isinstance(axis, int) is False: + raise TypeError("axis must be int type.") + if isinstance(indexes, list) is False: + raise TypeError("indexes must be list type.") + if axis != 1 and axis != 2 and axis != 3: + raise ValueError("axis must be 1, 2 or 3.") + if len(indexes) == 0: + raise ValueError("indexes can not be empty.") + + if name is None: + out = helper.create_variable_for_type_inference(dtype=input.dtype) + else: + out = helper.create_variable( + name=name, dtype=input.dtype, persistable=False) + helper.append_op( + type='similarity_focus', + inputs={'X': input}, + outputs={'Out': out}, + attrs={"axis": axis, + "indexes": indexes}) + return out + + def hash(input, hash_size, num_hash=1, name=None): """ Hash the input to an integer whose value is less than the given hash size. @@ -8176,3 +8301,72 @@ def add_position_encoding(input, alpha, beta, name=None): attrs={"alpha": alpha, "beta": beta}) return out + + +def bilinear_tensor_product(x, + y, + size, + act=None, + name=None, + param_attr=None, + bias_attr=None): + """ + **Add Bilinear Tensor Product Layer** + + This layer performs bilinear tensor product on two inputs. + For example: + + .. math:: + out{i} = x * W_{i} * {y^\mathrm{T}}, i=0,1,...,size-1 + + In this formula: + - :math:`x`: the first input contains M elements, shape is [batch_size, M]. + - :math:`y`: the second input contains N elements, shape is [batch_size, N]. + - :math:`W_{i}`: the i-th learned weight, shape is [M, N] + - :math:`out{i}`: the i-th element of out, shape is [batch_size, size]. + - :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`. + + Args: + x (Variable): 2-D input tensor with shape [batch_size, M] + y (Variable): 2-D input tensor with shape [batch_size, N] + size (int): The dimension of this layer. + act (str, default None): Activation to be applied to the output of this layer. + name (str, default None): The name of this layer. + param_attr (ParamAttr, default None): The parameter attribute for the learnable w. + parameters/weights of this layer. + bias_attr (ParamAttr, default None): The parameter attribute for the bias + of this layer. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. Default: None. + + Returns: + Variable: A 2-D Tensor of shape [batch_size, size]. + + Examples: + .. code-block:: python + + tensor = bilinear_tensor_product(x=layer1, y=layer2, size=1000) + """ + helper = LayerHelper('bilinear_tensor_product', **locals()) + dtype = helper.input_dtype('x') + + param_shape = [size, x.shape[1], y.shape[1]] + + w = helper.create_parameter( + attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=False) + + if name is None: + out = helper.create_variable_for_type_inference(dtype=dtype) + else: + out = helper.create_variable(name=name, dtype=dtype, persistable=False) + + inputs = {"X": x, "Y": y, "Weight": w} + if helper.bias_attr: + bias_size = [1, size] + bias = helper.create_parameter( + attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) + inputs["Bias"] = bias + helper.append_op( + type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}) + + # add activation + return helper.append_activation(out) diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 09a7cb8dc9339afa666f8cf09e92a27ffba8a9b3..ff32c00104171bf42c00be33f05758a4387228e1 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -24,10 +24,10 @@ from .layer_function_generator import templatedoc import numpy __all__ = [ - 'create_tensor', 'create_parameter', 'create_global_var', 'cast', 'concat', - 'sums', 'assign', 'fill_constant_batch_size_like', 'fill_constant', - 'argmin', 'argmax', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', - 'has_nan', 'isfinite' + 'create_tensor', 'create_parameter', 'create_global_var', 'cast', + 'tensor_array_to_tensor', 'concat', 'sums', 'assign', + 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax', + 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite' ] @@ -193,6 +193,60 @@ def concat(input, axis=0, name=None): return out +def tensor_array_to_tensor(input, axis=1, name=None): + """ + This function concatenates the input LodTensorArray along the axis mentioned + and returns that as the output. + + A simple example as below: + + .. code-block:: text + + Given: + + input.data = {[[0.6, 0.1, 0.3], + [0.5, 0.3, 0.2]], + [[1.3], + [1.8]], + [[2.3, 2.1], + [2.5, 2.4]]} + + axis = 1 + + Then: + + output.data = [[0.6, 0.1, 0.3, 1.3, 2.3, 2.1], + [0.5, 0.3, 0.2, 1.8, 2.5, 2.4]] + + output_index.data = [3, 1, 2] + + Args: + input(list): Input LodTensorArray + axis(int): Integer axis along which the tensors will be concatenated + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: Output variable of the concatenation + Variable: The input LodTensorArray items' dims along the axis + + Examples: + .. code-block:: python + + output, output_index = fluid.layers.tensor_array_to_tensor(input=tensor_array) + """ + helper = LayerHelper('tensor_array_to_tensor', **locals()) + out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) + out_index = helper.create_variable_for_type_inference(dtype="int32") + helper.append_op( + type='tensor_array_to_tensor', + inputs={'X': input}, + outputs={'Out': [out], + 'OutIndex': [out_index]}, + attrs={'axis': axis}) + return out, out_index + + def sums(input, out=None): """ This function performs the sum operation on the input and returns the diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 7e2364a5a872cdd8cf590438cc081ab070db767d..da92826d410505c9a80820f655162dd22e6b5966 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -13,21 +13,23 @@ # limitations under the License. from __future__ import print_function -import re -import sys + from collections import defaultdict +from contextlib import contextmanager + from paddle.fluid.framework import Program, Variable, name_scope, default_main_program +from paddle.fluid.distribute_lookup_table import find_distributed_lookup_table + from . import framework from . import layers +from . import unique_name from .backward import append_backward +from .clip import append_gradient_clip_ops, error_clip_callback from .framework import program_guard -from . import unique_name from .initializer import Constant from .layer_helper import LayerHelper -from .regularizer import append_regularization_ops -from .clip import append_gradient_clip_ops, error_clip_callback -from contextlib import contextmanager from .layers import ops +from .regularizer import append_regularization_ops __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', @@ -85,7 +87,7 @@ class Optimizer(object): name=unique_name.generate("learning_rate"), shape=[1], value=float(self._learning_rate), - dtype='float32' if self._dtype == None else self._dtype, + dtype='float32' if self._dtype is None else self._dtype, persistable=True) def _global_learning_rate(self, program=None): @@ -245,6 +247,50 @@ class Optimizer(object): end = len(global_block.ops) return global_block._slice_ops(start, end) + def _process_distribute_lookuptable(self, param_grads, loss, + startup_program): + """ + Because distribute lookup table only support SGD optimizer for now, not support + other optimizer and regularization, so we should find the table parameter out, + and avoid to add regularization and other op for it, and add sgd optimize op + for it independently. + :param param_grads(list((Var, Var))): list of (param, grad) pair. + :param loss: the loss variable. + :param startup_program: the startup program + """ + program = loss.block.program + table_name = find_distributed_lookup_table(program) + table_param = None + table_grad = None + new_param_grads = [] + for p, g in param_grads: + if p.name == table_name: + if table_param is not None: + raise RuntimeError( + "multi dist table var found, only support one now!") + table_param = p + table_grad = g + else: + new_param_grads.append((p, g)) + sgd_op = None + if table_param is not None: + with program_guard(program, startup_program): + param_and_grad = [table_param, table_grad] + with table_param.block.program._optimized_guard(param_and_grad), \ + framework.name_scope("optimizer"): + self._create_global_learning_rate() + # create the optimize op + sgd_op = loss.block.append_op( + type='sgd', + inputs={ + "Param": table_param, + "Grad": table_grad, + "LearningRate": + self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0]}) + return new_param_grads, (table_param, table_grad), sgd_op + def minimize(self, loss, startup_program=None, @@ -260,6 +306,9 @@ class Optimizer(object): params_grads = sorted(params_grads, key=lambda x: x[0].name) + params_grads, table_param_and_grad, table_optimize_op = \ + self._process_distribute_lookuptable(params_grads, loss, startup_program) + params_grads = append_gradient_clip_ops(params_grads) # Add regularization if any @@ -268,6 +317,9 @@ class Optimizer(object): optimize_ops = self._create_optimization_pass(params_grads, loss, startup_program) + if table_optimize_op is not None: + optimize_ops.append(table_optimize_op) + params_grads.append(table_param_and_grad) return optimize_ops, params_grads diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index f63387a90617dc4e9b7c9ee7caa2d01595237a03..42ab9b231153f7ede7b8f8dd4e754f8cc92f65fe 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -38,7 +38,7 @@ depth = 8 mix_hidden_lr = 1e-3 IS_SPARSE = True -PASS_NUM = 10 +PASS_NUM = 1 BATCH_SIZE = 10 embedding_name = 'emb' diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 28dc7519571d8b5464e92fddf634ba46691ceaa9..982d29180141d052e25ea3dcba6e3e7ce4181c48 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -128,6 +128,24 @@ class TestPriorBox(unittest.TestCase): assert box.shape[3] == 4 +class TestDensityPriorBox(unittest.TestCase): + def test_density_prior_box(self): + data_shape = [3, 224, 224] + images = fluid.layers.data( + name='pixel', shape=data_shape, dtype='float32') + conv1 = fluid.layers.conv2d(images, 3, 3, 2) + box, var = layers.density_prior_box( + input=conv1, + image=images, + densities=[3, 4], + fixed_sizes=[50., 60.], + fixed_ratios=[1.0], + clip=True) + assert len(box.shape) == 4 + assert box.shape == var.shape + assert box.shape[3] == 4 + + class TestAnchorGenerator(unittest.TestCase): def test_anchor_generator(self): data_shape = [3, 224, 224] diff --git a/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py b/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py new file mode 100644 index 0000000000000000000000000000000000000000..79d1fd3d7171e06a88a75cf50b6a51ef4da51f07 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_density_prior_box_op.py @@ -0,0 +1,142 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +import math +from op_test import OpTest + + +class TestDensityPriorBoxOp(OpTest): + def set_data(self): + self.init_test_params() + self.init_test_input() + self.init_test_output() + self.inputs = {'Input': self.input, 'Image': self.image} + + self.attrs = { + 'variances': self.variances, + 'clip': self.clip, + 'step_w': self.step_w, + 'step_h': self.step_h, + 'offset': self.offset, + 'densities': self.densities, + 'fixed_sizes': self.fixed_sizes, + 'fixed_ratios': self.fixed_ratios + } + self.outputs = {'Boxes': self.out_boxes, 'Variances': self.out_var} + + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "density_prior_box" + self.set_data() + + def set_density(self): + self.densities = [] + self.fixed_sizes = [] + self.fixed_ratios = [] + + def init_test_params(self): + self.layer_w = 32 + self.layer_h = 32 + + self.image_w = 40 + self.image_h = 40 + + self.step_w = float(self.image_w) / float(self.layer_w) + self.step_h = float(self.image_h) / float(self.layer_h) + + self.input_channels = 2 + self.image_channels = 3 + self.batch_size = 10 + + self.variances = [0.1, 0.1, 0.2, 0.2] + self.variances = np.array(self.variances, dtype=np.float).flatten() + + self.set_density() + + self.clip = True + self.num_priors = 0 + if len(self.fixed_sizes) > 0 and len(self.densities) > 0: + for density in self.densities: + if len(self.fixed_ratios) > 0: + self.num_priors += len(self.fixed_ratios) * (pow(density, + 2)) + self.offset = 0.5 + + def init_test_input(self): + self.image = np.random.random( + (self.batch_size, self.image_channels, self.image_w, + self.image_h)).astype('float32') + + self.input = np.random.random( + (self.batch_size, self.input_channels, self.layer_w, + self.layer_h)).astype('float32') + + def init_test_output(self): + out_dim = (self.layer_h, self.layer_w, self.num_priors, 4) + out_boxes = np.zeros(out_dim).astype('float32') + out_var = np.zeros(out_dim).astype('float32') + + step_average = int((self.step_w + self.step_h) * 0.5) + for h in range(self.layer_h): + for w in range(self.layer_w): + idx = 0 + c_x = (w + self.offset) * self.step_w + c_y = (h + self.offset) * self.step_h + # Generate density prior boxes with fixed size + for density, fixed_size in zip(self.densities, + self.fixed_sizes): + if (len(self.fixed_ratios) > 0): + for ar in self.fixed_ratios: + shift = int(step_average / density) + box_width_ratio = fixed_size * math.sqrt(ar) + box_height_ratio = fixed_size / math.sqrt(ar) + for di in range(density): + for dj in range(density): + c_x_temp = c_x - step_average / 2.0 + shift / 2.0 + dj * shift + c_y_temp = c_y - step_average / 2.0 + shift / 2.0 + di * shift + out_boxes[h, w, idx, :] = [ + max((c_x_temp - box_width_ratio / 2.0) / + self.image_w, 0), + max((c_y_temp - box_height_ratio / 2.0) + / self.image_h, 0), + min((c_x_temp + box_width_ratio / 2.0) / + self.image_w, 1), + min((c_y_temp + box_height_ratio / 2.0) + / self.image_h, 1) + ] + idx += 1 + if self.clip: + out_boxes = np.clip(out_boxes, 0.0, 1.0) + out_var = np.tile(self.variances, + (self.layer_h, self.layer_w, self.num_priors, 1)) + self.out_boxes = out_boxes.astype('float32') + self.out_var = out_var.astype('float32') + + +class TestDensityPriorBox(TestDensityPriorBoxOp): + def set_density(self): + self.densities = [3, 4] + self.fixed_sizes = [1.0, 2.0] + self.fixed_ratios = [1.0] + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 3a5b6b5cb8ee4f83c26a96e868e7c75933d28c15..d132dd3c48f55c07725515e40faeb5076398adeb 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -567,7 +567,6 @@ class TestDistLookupTable(TestDistLookupTableBase): 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', - 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', 'uniform_random', 'uniform_random', 'recv', 'recv', 'recv', 'fetch_barrier', 'concat', 'fake_init' @@ -639,7 +638,7 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase): # 5 save table self.assertEqual([op.type for op in pserver1.blocks[5].ops], ["save"]) - trainer, _ = self.get_trainer(config) + trainer, trainer_startup = self.get_trainer(config) self.assertEqual(len(trainer.blocks), 1) ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', @@ -653,6 +652,16 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase): 'recv', 'concat' ] self.assertEqual([op.type for op in trainer.blocks[0].ops], ops) + startup_ops = [ + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'fill_constant', 'fill_constant', + 'fill_constant', 'fill_constant', 'uniform_random', + 'uniform_random', 'recv', 'recv', 'recv', 'fetch_barrier', 'concat', + 'fake_init' + ] + self.assertEqual([op.type for op in trainer_startup.blocks[0].ops], + startup_ops) class TestDistLookupTableSliceSize(TestDistLookupTableBase): diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 692f2375f2119c3def1751d92087613cd965bf25..a8fa5436c43d2f05f632b920f67d43d837d28da9 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -369,6 +369,10 @@ class TestBook(unittest.TestCase): with program_guard(program): x = layers.data(name='x', shape=[16], dtype='float32') y = layers.data(name='label', shape=[1], dtype='int64') + loss, softmax = layers.softmax_with_cross_entropy( + x, y, return_softmax=True) + self.assertIsNotNone(loss) + self.assertIsNotNone(softmax) loss = layers.softmax_with_cross_entropy(x, y) self.assertIsNotNone(loss) print(str(program)) @@ -911,6 +915,16 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(data_1) print(str(program)) + def test_bilinear_tensor_product_layer(self): + program = Program() + with program_guard(program): + data = layers.data(name='data', shape=[4], dtype="float32") + + theta = layers.data(name="theta", shape=[5], dtype="float32") + out = layers.bilinear_tensor_product(data, theta, 6) + + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py index c93740669f40aee3a6c143d153cfd0f5bb72dbd9..18d95c94ad36316b7149eb5412260b40a57ac002 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py @@ -21,8 +21,8 @@ import six class TestBase(unittest.TestCase): def main(self, network_func, - iter=100, - iter_per_pe=100, + iter=10, + iter_per_pe=10, use_gpu=True, use_experimental_executor=False): if use_gpu and not fluid.core.is_compiled_with_cuda(): @@ -45,7 +45,7 @@ class TestBase(unittest.TestCase): exe_strategy._dry_run = True exe_strategy.use_experimental_executor = use_experimental_executor pe = fluid.ParallelExecutor( - use_cuda=True, + use_cuda=use_gpu, loss_name=loss.name, main_program=main_prog, exec_strategy=exe_strategy) diff --git a/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py new file mode 100755 index 0000000000000000000000000000000000000000..b3833f05f1aa3aac7b5bcc5b6fdc138870cc8844 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_similarity_focus_op.py @@ -0,0 +1,217 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid.core as core +from op_test import OpTest + + +class TestSimilarityFocusOp(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 2 + x_dim, y_dim, z_dim = 3, 2, 2 + self.inputs = { + 'X': np.array([[[[0.8, 0.1], [0.4, 0.5]], [[0.9, 0.7], [0.9, 0.9]], + [[0.8, 0.9], [0.1, 0.2]]], + [[[0.2, 0.5], [0.3, 0.4]], [[0.9, 0.7], [0.8, 0.4]], + [[0.0, 0.2], [0.4, 0.7]]]]), + } + self.attrs = { + 'axis': 1, + 'indexes': [0], + } + + output = None + for batch in range(batch_size): + res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, index, :, :].reshape(-1).copy( + ) + tag1 = [0 for i in range(y_dim)] + tag2 = [0 for i in range(z_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // z_dim + idx2 = index % z_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(y_dim, z_dim): + break + channel[index] = -1 + res = res.reshape(1, y_dim, z_dim).repeat([x_dim], axis=0) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestSimilarityFocusOp_axis1(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 3 + x_dim, y_dim, z_dim = 4, 5, 6 + self.inputs = { + 'X': np.random.random( + (batch_size, x_dim, y_dim, z_dim)).astype("float32"), + } + self.attrs = { + 'axis': 1, + 'indexes': [0, 3], + } + + output = None + for batch in range(batch_size): + res = np.zeros((1, y_dim, z_dim)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, index, :, :].reshape(-1).copy( + ) + tag1 = [0 for i in range(y_dim)] + tag2 = [0 for i in range(z_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // z_dim + idx2 = index % z_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(y_dim, z_dim): + break + channel[index] = -1 + res = res.reshape(1, y_dim, z_dim) + res = res.repeat([x_dim], axis=0) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestSimilarityFocusOp_axis2(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 6 + x_dim, y_dim, z_dim = 7, 8, 9 + self.inputs = { + 'X': np.random.random( + (batch_size, x_dim, y_dim, z_dim)).astype("float32"), + } + self.attrs = { + 'axis': 2, + 'indexes': [0, 3, 5], + } + + output = None + for batch in range(batch_size): + res = np.zeros((x_dim, 1, z_dim)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, :, index, :].reshape(-1).copy( + ) + tag1 = [0 for i in range(x_dim)] + tag2 = [0 for i in range(z_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // z_dim + idx2 = index % z_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(x_dim, z_dim): + break + channel[index] = -1 + res = res.reshape(x_dim, 1, z_dim) + res = res.repeat([y_dim], axis=1) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +class TestSimilarityFocusOp_axis3(OpTest): + def setUp(self): + self.op_type = "similarity_focus" + batch_size = 64 + x_dim, y_dim, z_dim = 48, 48, 13 + self.inputs = { + 'X': np.random.random( + (batch_size, x_dim, y_dim, z_dim)).astype("float32"), + } + self.attrs = { + 'axis': 3, + 'indexes': [0, 2, 7, 9], + } + + output = None + for batch in range(batch_size): + res = np.zeros((x_dim, y_dim, 1)).astype("float32").reshape(-1) + for index in self.attrs['indexes']: + channel = self.inputs['X'][batch, :, :, index].reshape(-1).copy( + ) + tag1 = [0 for i in range(x_dim)] + tag2 = [0 for i in range(y_dim)] + cnt = 0 + for i in range(channel.size): + index = channel.argmax() + idx1 = index // y_dim + idx2 = index % y_dim + if tag1[idx1] + tag2[idx2] == 0: + tag1[idx1] = 1 + tag2[idx2] = 1 + res[index] = 1 + cnt += 1 + if cnt == min(x_dim, y_dim): + break + channel[index] = -1 + res = res.reshape(x_dim, y_dim, 1) + res = res.repeat([z_dim], axis=2) + res = res.reshape(1, x_dim, y_dim, z_dim) + if output is not None: + output = np.concatenate((output, res), axis=0) + else: + output = res + self.outputs = {'Out': output} + + def test_check_output(self): + self.check_output() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..78b95de7e07b1d1fcdeeae63498e740c2b474c6d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_tensor_array_to_tensor.py @@ -0,0 +1,142 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.op import Operator +from paddle.fluid.executor import Executor + + +class TestLoDTensorArrayConcat(unittest.TestCase): + def setUp(self): + self.op_type = "tensor_array_to_tensor" + self.attrs = {"axis": 0} + self.outputs = ["Out"] + + def test_get_set(self): + scope = core.Scope() + program = fluid.Program() + block = program.global_block() + + input_arr = block.create_var( + name="tmp_lod_tensor_array", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + input_arr.persistable = True + input_arr_var = scope.var('tmp_lod_tensor_array') + input_tensor_array = input_arr_var.get_lod_tensor_array() + self.assertEqual(0, len(input_tensor_array)) + + cpu = core.CPUPlace() + for i in range(10): + t = core.LoDTensor() + if i == 0: + t.set(numpy.array([[i], [i]], dtype='float32'), cpu) + else: + t.set(numpy.array([[i]], dtype='float32'), cpu) + input_tensor_array.append(t) + + self.assertEqual(10, len(input_tensor_array)) + + random_grad = numpy.random.random_sample([11]).astype(numpy.float32) + + y_out = block.create_var(name="Out") + y_out.persistable = True + y_out_index = block.create_var(name="OutIndex") + y_out_index.persistable = True + + y_grad_arr = block.create_var( + name='Out@GRAD', dtype='float32', shape=[11]) + y_grad_arr.persistable = True + y_grad = scope.var('Out@GRAD') + y_grad_tensor = y_grad.get_tensor() + y_grad_tensor.set(random_grad, cpu) + + op = block.append_op( + type=self.op_type, + inputs={"X": input_arr}, + outputs={"Out": y_out, + "OutIndex": y_out_index}, + attrs=self.attrs) + + out_grad = block.create_var( + name="tmp_lod_tensor_array@GRAD", + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY) + out_grad.persistable = True + + grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc(op.desc, + set(), []) + grad_op_desc = grad_op_desc_list[0] + new_op_desc = block.desc.append_op() + new_op_desc.copy_from(grad_op_desc) + for var_name in grad_op_desc.output_arg_names(): + block.desc.var(var_name.encode("ascii")) + + grad_op_desc.infer_var_type(block.desc) + grad_op_desc.infer_shape(block.desc) + for arg in grad_op_desc.output_arg_names(): + grad_var = block.desc.find_var(arg.encode("ascii")) + grad_var.set_dtype(core.VarDesc.VarType.FP32) + + fetch_list = [] + fetch_list.append(block.var('Out')) + fetch_list.append(block.var('OutIndex')) + + exe = fluid.Executor(fluid.CPUPlace()) + out = exe.run(program, fetch_list=fetch_list, scope=scope) + #print ("index: ", numpy.array(out[1])) + + # test forward + tensor_res = numpy.array(out[0]) + tensor_res_out_idx = numpy.array(out[1]) + tensor_gt = numpy.array( + [0] + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float32') + + self.assertEqual(len(tensor_res), len(tensor_gt)) + self.assertEqual(len(tensor_res_out_idx), 10) + + for i in range(len(tensor_res)): + self.assertEqual(tensor_res[i], tensor_gt[i]) + + for i in range(len(tensor_res_out_idx)): + if i == 0: + self.assertEqual(tensor_res_out_idx[i], 2) + else: + self.assertEqual(tensor_res_out_idx[i], 1) + + # test backward + grad_tensor = scope.var('tmp_lod_tensor_array@GRAD') + grad_tensor_array = grad_tensor.get_lod_tensor_array() + + self.assertEqual(10, len(grad_tensor_array)) + + for i in range(len(grad_tensor_array)): + if i == 0: + self.assertEqual( + numpy.array(grad_tensor_array[i])[0], + numpy.array(random_grad[i])) + self.assertEqual( + numpy.array(grad_tensor_array[i])[1], + numpy.array(random_grad[i + 1])) + if i == 1: + self.assertEqual( + numpy.array(grad_tensor_array[i]), + numpy.array(random_grad[i + 1])) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 094eaeb59ce7ab73012f6e6a5fc24778933270c1..89bc24802751340b6d4657be8673d714f3d3dc2b 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -31,18 +31,17 @@ Steps to transpile pserver: """ import math -import sys import numpy as np import collections -import six import logging -from .ps_dispatcher import RoundRobin, HashName, PSDispatcher +from .ps_dispatcher import RoundRobin, PSDispatcher from .. import core, framework, unique_name from ..framework import Program, default_main_program, \ default_startup_program, Block, \ Parameter, grad_var_name from .details import * +from ..distribute_lookup_table import find_distributed_lookup_table from functools import reduce LOOKUP_TABLE_TYPE = "lookup_table" @@ -292,7 +291,8 @@ class DistributeTranspiler(object): self.optimize_ops, self.params_grads = self._get_optimize_pass() ps_dispatcher = self.config.split_method(self.pserver_endpoints) - self.has_distributed_lookup_table = self._has_distributed_lookup_table() + self.table_name = find_distributed_lookup_table(self.origin_program) + self.has_distributed_lookup_table = self.table_name != None self.param_name_to_grad_name = dict() self.grad_name_to_param_name = dict() for param_var, grad_var in self.params_grads: @@ -966,28 +966,6 @@ to transpile() call.") # ====================== private transpiler functions ===================== - def _has_distributed_lookup_table(self): - # process lookup_table_op - # 1. check all lookup_table_op is distributed - # 2. check all lookup_table_op share the same table. - distributed_lookup_table_ops = [] - # support only one distributed_lookup_table now - self.table_name = None - for op in self.origin_program.global_block().ops: - if op.type == LOOKUP_TABLE_TYPE: - if op.attr('is_distributed') is True: - if self.table_name is None: - self.table_name = op.input("W")[0] - if self.table_name != op.input("W")[0]: - raise RuntimeError("all distributed lookup_table_ops" - " should have only one table") - distributed_lookup_table_ops.append(op) - else: - if self.table_name is not None: - assert op.input("W")[0] != self.table_name - - return len(distributed_lookup_table_ops) > 0 - def _update_dist_lookup_table_vars(self, param_list, grad_list, params_grads): # TODO(wuyi): put find a way to put dist lookup table stuff all together. @@ -1341,7 +1319,6 @@ to transpile() call.") """ create a new block to handle save checkpoint. """ - import os pserver_program.global_block().create_var( name="kLookupTablePath", diff --git a/python/setup.py.in b/python/setup.py.in index b1ff9f3a5c3d877edb6bc6a12efce053a44b4c9c..c623057d5081a6fedcd90eb5f5d53531a5d62bb8 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -174,6 +174,18 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': raise Exception("patch libmkldnn.so failed, command: %s" % command) package_data['paddle.libs']+=['libmkldnn.so.0'] shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) +if '${WITH_NGRAPH}' == 'ON': + if '${CMAKE_BUILD_TYPE}' == 'Release': + # only change rpath in Release mode. + command = "patchelf --set-rpath '$ORIGIN/' ${NGRAPH_SHARED_LIB}" + if os.system(command) != 0: + raise Exception("patch ${NGRAPH_SHARED_LIB_NAME} failed, command: %s" % command) + shutil.copy('${NGRAPH_SHARED_LIB}', libs_path) + shutil.copy('${NGRAPH_CPU_LIB}', libs_path) + shutil.copy('${NGRAPH_TBB_LIB}', libs_path) + package_data['paddle.libs']+=['${NGRAPH_SHARED_LIB_NAME}', + '${NGRAPH_CPU_LIB_NAME}', + '${NGRAPH_TBB_LIB_NAME}'] # remove unused paddle/libs/__init__.py os.remove(libs_path+'/__init__.py') package_dir['paddle.libs']=libs_path