diff --git a/CMakeLists.txt b/CMakeLists.txt index efa68c9ba243af3c7cdca52b915cc14d307ae89f..1594e798a2ba3f735a28a43ef933d80b3b3f8964 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,7 @@ option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF) -option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF) +option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) @@ -254,6 +254,12 @@ elseif() set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in MKL only now." FORCE) endif() +if (WITH_PROFILER) + find_package(Gperftools REQUIRED) + include_directories(${GPERFTOOLS_INCLUDE_DIR}) + add_definitions(-DWITH_GPERFTOOLS) +endif() + include(generic) # simplify cmake module include(package) # set paddle packages include(ccache) # set ccache for compilation diff --git a/cmake/FindGperftools.cmake b/cmake/FindGperftools.cmake new file mode 100644 index 0000000000000000000000000000000000000000..928f573a4fb82391859e334d50e6c8ed0e26aae2 --- /dev/null +++ b/cmake/FindGperftools.cmake @@ -0,0 +1,63 @@ +# Tries to find Gperftools. +# +# Usage of this module as follows: +# +# find_package(Gperftools) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# Gperftools_ROOT_DIR Set this variable to the root installation of +# Gperftools if the module has problems finding +# the proper installation path. +# +# Variables defined by this module: +# +# GPERFTOOLS_FOUND System has Gperftools libs/headers +# GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler) +# GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers + +find_library(GPERFTOOLS_TCMALLOC + NAMES tcmalloc + HINTS ${Gperftools_ROOT_DIR}/lib) + +find_library(GPERFTOOLS_PROFILER + NAMES profiler + HINTS ${Gperftools_ROOT_DIR}/lib) + +find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER + NAMES tcmalloc_and_profiler + HINTS ${Gperftools_ROOT_DIR}/lib) + +find_path(GPERFTOOLS_INCLUDE_DIR + NAMES gperftools/heap-profiler.h + HINTS ${Gperftools_ROOT_DIR}/include) + +set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + Gperftools + DEFAULT_MSG + GPERFTOOLS_LIBRARIES + GPERFTOOLS_INCLUDE_DIR) + +mark_as_advanced( + Gperftools_ROOT_DIR + GPERFTOOLS_TCMALLOC + GPERFTOOLS_PROFILER + GPERFTOOLS_TCMALLOC_AND_PROFILER + GPERFTOOLS_LIBRARIES + GPERFTOOLS_INCLUDE_DIR) + +# create IMPORTED targets +if (Gperftools_FOUND AND NOT TARGET gperftools::tcmalloc) + add_library(gperftools::tcmalloc UNKNOWN IMPORTED) + set_target_properties(gperftools::tcmalloc PROPERTIES + IMPORTED_LOCATION ${GPERFTOOLS_TCMALLOC} + INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}") + add_library(gperftools::profiler UNKNOWN IMPORTED) + set_target_properties(gperftools::profiler PROPERTIES + IMPORTED_LOCATION ${GPERFTOOLS_PROFILER} + INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}") +endif() diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 4e17ddee73958106d5e2c8c8ea5661acc758518a..51f7a61631d7102b60646abe1c6dd7775692f157 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -86,6 +86,7 @@ endif(NOT WITH_GOLANG) if(WITH_GPU) add_definitions(-DPADDLE_WITH_CUDA) + add_definitions(-DEIGEN_USE_GPU) FIND_PACKAGE(CUDA REQUIRED) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 312fbaa0b3d83c37debe78be82503103eabc0bfa..a8b9dcfcf5eec39af0f59c03b1ed9bd4b71ee7bf 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -110,6 +110,14 @@ function(find_fluid_modules TARGET_NAME) endif() endfunction(find_fluid_modules) + +function(common_link TARGET_NAME) + if (WITH_PROFILER) + target_link_libraries(${TARGET_NAME} gperftools::profiler) + endif() +endfunction() + + # find all third_party modules is used for paddle static library # for reduce the dependency when building the inference libs. set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY) @@ -274,6 +282,7 @@ function(cc_library TARGET_NAME) endif() target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) + common_link(${TARGET_NAME}) endif() # cpplint code style @@ -340,6 +349,7 @@ function(cc_binary TARGET_NAME) if(cc_binary_DEPS) target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) add_dependencies(${TARGET_NAME} ${cc_binary_DEPS}) + common_link(${TARGET_NAME}) endif() endfunction(cc_binary) @@ -362,6 +372,7 @@ function(cc_test TARGET_NAME) target_link_libraries(${TARGET_NAME} ${win32_deps}) endif(WIN32) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + common_link(${TARGET_NAME}) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} ${cc_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) @@ -420,6 +431,7 @@ function(nv_binary TARGET_NAME) if(nv_binary_DEPS) target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) + common_link(${TARGET_NAME}) endif() endif() endfunction(nv_binary) @@ -433,6 +445,7 @@ function(nv_test TARGET_NAME) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) if (nv_test_SERIAL) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) @@ -499,6 +512,7 @@ function(hip_binary TARGET_NAME) if(hip_binary_DEPS) target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS}) add_dependencies(${TARGET_NAME} ${hip_binary_DEPS}) + common_link(${TARGET_NAME}) endif() endif() endfunction(hip_binary) @@ -518,6 +532,7 @@ function(hip_test TARGET_NAME) set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) + common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) endif() endfunction(hip_test) @@ -560,6 +575,7 @@ function(go_library TARGET_NAME) endif() if(go_library_DEPS) add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + common_link(${TARGET_NAME}) endif(go_library_DEPS) # The "source file" of the library is `${dummyfile}` which never diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 7458b69af8cfb763744e4534c783226b5f3db449..eb4baa06b5284512eab128e57f893bad43afda97 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -31,13 +31,36 @@ limitations under the License. */ #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" #include "paddle/fluid/platform/profiler.h" +#ifdef WITH_GPERFTOOLS +#include "gperftools/profiler.h" +#endif +DEFINE_string(pe_profile_fname, "", + "Profiler filename for PE, which generated by gperftools." + "Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."); + namespace paddle { namespace framework { +static std::once_flag gProfileOnce; +#ifdef WITH_GPERFTOOLS +static bool gProfileStarted = false; +#endif class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(const std::vector &places) - : places_(places) {} + : places_(places) { + if (!FLAGS_pe_profile_fname.empty()) { + std::call_once(gProfileOnce, [] { +#ifdef WITH_GPERFTOOLS + ProfilerStart(FLAGS_pe_profile_fname.c_str()); + gProfileStarted = true; +#else + LOG(WARNING) << "Paddle is not compiled with gperftools. " + "FLAGS_pe_profile_fname will be ignored"; +#endif + }); + } + } ~ParallelExecutorPrivate() { if (own_local_scope_) { @@ -341,6 +364,12 @@ void ParallelExecutor::BCastParamsToDevices( void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { +#ifdef WITH_GPERFTOOLS + if (gProfileStarted) { + ProfilerFlush(); + } +#endif + platform::RecordBlock b(0); if (member_->HasGarbageCollectors()) { member_->ResetRuntimeReferenceCount(fetch_tensors, fetched_var_name); diff --git a/paddle/fluid/operators/bilinear_tensor_product_op.cu b/paddle/fluid/operators/bilinear_tensor_product_op.cu index 9426ffbe174c7daf9f24525f5f7ca12d986042f4..c2b4f69e6854522b91dfd9fb5f738c0e5ffc77b1 100644 --- a/paddle/fluid/operators/bilinear_tensor_product_op.cu +++ b/paddle/fluid/operators/bilinear_tensor_product_op.cu @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU #include "paddle/fluid/operators/bilinear_tensor_product_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/cos_sim_op.cu b/paddle/fluid/operators/cos_sim_op.cu index 82205e9c75402e368a2d1e161d471e35ff7356ea..3d144ca29d9989ad2cbb438a950860eaac873d07 100644 --- a/paddle/fluid/operators/cos_sim_op.cu +++ b/paddle/fluid/operators/cos_sim_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/cos_sim_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/crop_op.cu b/paddle/fluid/operators/crop_op.cu index b75678217e36aa2297c68a7f8e2a9dfafadaca72..66cb5c452de4b2107693127ce414daf9fb7cd7d8 100644 --- a/paddle/fluid/operators/crop_op.cu +++ b/paddle/fluid/operators/crop_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/crop_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu index e011f47e086183a4ef3a3373c17acd6c21b6cf7e..d65491267de1ce3495d8b8250cf0cff570dfcc6a 100644 --- a/paddle/fluid/operators/dropout_op.cu +++ b/paddle/fluid/operators/dropout_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include #include #include diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cu b/paddle/fluid/operators/elementwise/elementwise_add_op.cu index 2fb7eeb4b9e3119a6eea3e69a2a6002a80f6c0f3..fed12785f47e1b8eea3f053712830901bee3bdc9 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise/elementwise_add_op.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cu b/paddle/fluid/operators/elementwise/elementwise_div_op.cu index c5a1a7e08d89f3ef205af4c37246f8fa288189f3..1a149298fd33f132a90ff5de3b35dd5894a4ae68 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise/elementwise_div_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cu b/paddle/fluid/operators/elementwise/elementwise_max_op.cu index a90dcd3ecf0da114110db5946e111a8b3a925e42..5d086a1b29febd8e57507eced7683f414ca34e07 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise/elementwise_max_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cu b/paddle/fluid/operators/elementwise/elementwise_min_op.cu index ab77709c28c15a925bd3deac07c43e12b12cb781..cf93e5a97a3f3110aae907c593f58dbab0f9d090 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise/elementwise_min_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu index 4d16bc38e1d8e4cbbe3afbe08f233e14329e0f2e..833c4072826c58277bc23e03b787fafbbaa73d03 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise/elementwise_mul_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.cu b/paddle/fluid/operators/elementwise/elementwise_pow_op.cu index 6ee0779f23bc2c734aa1d439abb12f366227e686..9263dbfebfd00451f3e67c3ca9d2081b5b4904bd 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op.cu @@ -8,8 +8,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise/elementwise_pow_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cu b/paddle/fluid/operators/elementwise/elementwise_sub_op.cu index 8d9bf7c4d81d49d83b5d1cf0369be5c9957242b4..6f17d3292f307b009c640738109d5a4f4ca4caa9 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/elementwise/elementwise_sub_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/expand_op.cu b/paddle/fluid/operators/expand_op.cu index 60363bfc86d7d1a79d7b018cee43a41c1247a994..d95c9b61802b5fe7059e1c95a50776db5aa7ad93 100644 --- a/paddle/fluid/operators/expand_op.cu +++ b/paddle/fluid/operators/expand_op.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include "paddle/fluid/operators/expand_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/gru_unit_op.cu b/paddle/fluid/operators/gru_unit_op.cu index fc92b3d4a7a5a933f31b21d18238de386b3afb4d..37689901ecbeeda44f52a2fc7a686f4edf6682bb 100644 --- a/paddle/fluid/operators/gru_unit_op.cu +++ b/paddle/fluid/operators/gru_unit_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/gru_unit_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/hinge_loss_op.cu b/paddle/fluid/operators/hinge_loss_op.cu index 9c0a85bee6e28865225c1848ea5a378f48932ceb..b5ea0a702e0e540c1831ca241a5def19f86c239c 100644 --- a/paddle/fluid/operators/hinge_loss_op.cu +++ b/paddle/fluid/operators/hinge_loss_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/hinge_loss_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/huber_loss_op.cu b/paddle/fluid/operators/huber_loss_op.cu index 659464df9dc0e7c8cd276bd0bbf7072361aa3abf..09c743c4275169ba8c53ccbd428100b2fc4483d6 100644 --- a/paddle/fluid/operators/huber_loss_op.cu +++ b/paddle/fluid/operators/huber_loss_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/huber_loss_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/im2sequence_op.cu b/paddle/fluid/operators/im2sequence_op.cu index e0a5a90c1c3c47ea45b3f83ae969c1861783ff60..1c34640618d58d3b5fe627fa6596260a7b687d05 100644 --- a/paddle/fluid/operators/im2sequence_op.cu +++ b/paddle/fluid/operators/im2sequence_op.cu @@ -11,8 +11,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/im2sequence_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/isfinite_op.cu b/paddle/fluid/operators/isfinite_op.cu index 8d1268b18c6fec03063051f545075209a6fcde27..995969cd42f08c7fa948262e42793106e745b3a7 100644 --- a/paddle/fluid/operators/isfinite_op.cu +++ b/paddle/fluid/operators/isfinite_op.cu @@ -11,8 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/isfinite_op.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/l1_norm_op.cu b/paddle/fluid/operators/l1_norm_op.cu index 1b48571dd7378c1c2a6628662024bc7bcc08d2a6..a5c29bbf5debdd11f6e5b28b3a8b48c2c484517a 100644 --- a/paddle/fluid/operators/l1_norm_op.cu +++ b/paddle/fluid/operators/l1_norm_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/l1_norm_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/log_loss_op.cu b/paddle/fluid/operators/log_loss_op.cu index e8bf7d8159bf8b16bf4397e7765918c060124db3..280913c43a2749ddd5fbd3ae1905f1b823dd525d 100644 --- a/paddle/fluid/operators/log_loss_op.cu +++ b/paddle/fluid/operators/log_loss_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/log_loss_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/math/context_project.cu b/paddle/fluid/operators/math/context_project.cu index 16205c0e145ef70666d4eca564488d80bde26d2e..f04b2d15349be329ee228fc8903c9b38a5349634 100644 --- a/paddle/fluid/operators/math/context_project.cu +++ b/paddle/fluid/operators/math/context_project.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include "paddle/fluid/operators/math/context_project.h" namespace paddle { diff --git a/paddle/fluid/operators/math/jit_kernel_layer_norm.cc b/paddle/fluid/operators/math/jit_kernel_layer_norm.cc index fead13ebadcd131afafc308740cdd39b1c53bc08..cb49e66488bd69d92430cbf6de1d08348ffe0202 100644 --- a/paddle/fluid/operators/math/jit_kernel_layer_norm.cc +++ b/paddle/fluid/operators/math/jit_kernel_layer_norm.cc @@ -79,16 +79,16 @@ class LayerNormKernelImpl : public LayerNormKernel { } }; -#define INTRIAVX_FLOAT(isa, block) \ +#define INTRIAVX_FLOAT(isa, jit_block) \ template <> \ - LayerNormKernelImpl::LayerNormKernelImpl(int right) \ + LayerNormKernelImpl::LayerNormKernelImpl(int right) \ : LayerNormKernel() { \ this->num_ = right; \ this->rest_ = this->num_ % YMM_FLOAT_BLOCK; \ this->end_ = this->num_ - this->rest_; \ } \ template <> \ - void LayerNormKernelImpl::Compute( \ + void LayerNormKernelImpl::Compute( \ float* x, float* out, float* mean, float* var, const float* scale, \ const float* bias, int height, const float epsilon) const { \ __m256 sum; \ @@ -97,6 +97,7 @@ class LayerNormKernelImpl : public LayerNormKernel { __m256 tmp; \ size_t offset; \ size_t j; \ + size_t block = YMM_FLOAT_BLOCK; \ __m256 reverse_num_vec = \ _mm256_div_ps(_mm256_set1_ps(1.0), _mm256_set1_ps(this->num_)); \ __m256 epsilon_vec = _mm256_set1_ps(epsilon); \ @@ -221,12 +222,14 @@ INTRIAVX_FLOAT(platform::avx, kEQ8); INTRIAVX_FLOAT(platform::avx, kGT8LT16); INTRIAVX_FLOAT(platform::avx, kEQ16); INTRIAVX_FLOAT(platform::avx, kGT16); -#endif -#ifdef __AVX2__ INTRIAVX_FLOAT(platform::avx2, kEQ8); INTRIAVX_FLOAT(platform::avx2, kGT8LT16); INTRIAVX_FLOAT(platform::avx2, kEQ16); INTRIAVX_FLOAT(platform::avx2, kGT16); +INTRIAVX_FLOAT(platform::avx512f, kEQ8); +INTRIAVX_FLOAT(platform::avx512f, kGT8LT16); +INTRIAVX_FLOAT(platform::avx512f, kEQ16); +INTRIAVX_FLOAT(platform::avx512f, kGT16); #endif #undef INTRIAVX_FLOAT diff --git a/paddle/fluid/operators/math/math_function.cu b/paddle/fluid/operators/math/math_function.cu index 79b7538ad05b0ff348b8264d50b63211b5254e80..9372d63f0bea2b0c9f37d47376d7b7014e381a33 100644 --- a/paddle/fluid/operators/math/math_function.cu +++ b/paddle/fluid/operators/math/math_function.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/operators/math/blas.h" diff --git a/paddle/fluid/operators/math/sequence2batch.cu b/paddle/fluid/operators/math/sequence2batch.cu index be73adfc0cbe37ed8831b5ad34e66bc95e342e9d..9ab13659c1cc5b59d28395bcebcfb43fac5b4544 100644 --- a/paddle/fluid/operators/math/sequence2batch.cu +++ b/paddle/fluid/operators/math/sequence2batch.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/math/sequence2batch.h" namespace paddle { diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index 2e9669049e36478549b793e3fa76220825888e21..71d137398267f61d8cc01907d6a9498eef8d62dc 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include #include "paddle/fluid/operators/math/math_function.h" diff --git a/paddle/fluid/operators/mean_op.cu b/paddle/fluid/operators/mean_op.cu index 413b8ace67bd0a36849373812950834523b62216..921c2e1298906655767c1e7f30dc34b2c564c671 100644 --- a/paddle/fluid/operators/mean_op.cu +++ b/paddle/fluid/operators/mean_op.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include "paddle/fluid/operators/mean_op.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/optimizers/adadelta_op.cu b/paddle/fluid/operators/optimizers/adadelta_op.cu index 3fbfee5df05770a1206ab3170d3baffdd20bc77b..562a157f063b44d65254d556d44439eee3636c4c 100644 --- a/paddle/fluid/operators/optimizers/adadelta_op.cu +++ b/paddle/fluid/operators/optimizers/adadelta_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/adadelta_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cu b/paddle/fluid/operators/optimizers/adagrad_op.cu index 4efe56855a4bdca41d24f02c29a618a8d4232887..5043468d4c5f721ae0906b1a319eb3ec10b26580 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cu +++ b/paddle/fluid/operators/optimizers/adagrad_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/operators/optimizers/adagrad_op.h" diff --git a/paddle/fluid/operators/optimizers/adam_op.cu b/paddle/fluid/operators/optimizers/adam_op.cu index e8090ebacfe85153aba9e275c9cd1c55fd7af15e..4eb2db717d45a730798eef48d3d10bce9d387c4b 100644 --- a/paddle/fluid/operators/optimizers/adam_op.cu +++ b/paddle/fluid/operators/optimizers/adam_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/adam_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/optimizers/adamax_op.cu b/paddle/fluid/operators/optimizers/adamax_op.cu index e54adcb142fe0d50dad23fe5df14bd6f28220d8a..80e0219d4414db2909b5babc22599d8c0d906c7d 100644 --- a/paddle/fluid/operators/optimizers/adamax_op.cu +++ b/paddle/fluid/operators/optimizers/adamax_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/adamax_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cu b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cu index 84d65e39329659f82099011f9ec60468d5db6328..dc568802a2b19fee5c8d7fd8d07c929cba8ab4e3 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cu +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/decayed_adagrad_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/optimizers/ftrl_op.cu b/paddle/fluid/operators/optimizers/ftrl_op.cu index f836b75df93861a0fd670f2a0e786e6a797a4661..acf8e38ca0f5a3cf9899f4898898013e8a2afdd2 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.cu +++ b/paddle/fluid/operators/optimizers/ftrl_op.cu @@ -10,8 +10,6 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/ftrl_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu index d1c1f747b70c3ceb806da06e6786a70b62a32995..591dead3b12763e4cd1b9c390a87816ab121fbf8 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu @@ -10,8 +10,6 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/proximal_adagrad_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cu b/paddle/fluid/operators/optimizers/proximal_gd_op.cu index 7aa0e1015008eba0c1cf63ba1278dc2b8049b20b..d556fa74f19529d0e2f80d4c6dbfca62498c9dcc 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cu +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cu @@ -10,8 +10,6 @@ Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/proximal_gd_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.cu b/paddle/fluid/operators/optimizers/rmsprop_op.cu index 69e35a309e04f61068d9ff1b6d9f1450d2524253..8b17d6a0204045a9b20adb79dbad72dff5ba267e 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.cu +++ b/paddle/fluid/operators/optimizers/rmsprop_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/optimizers/rmsprop_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/pad_constant_like_op.cu b/paddle/fluid/operators/pad_constant_like_op.cu index ea69577904577de353b63491973bf74b7724e18e..9e62a6dc9d34a96c59a08d0e5fd6cdd9f0d6d51d 100644 --- a/paddle/fluid/operators/pad_constant_like_op.cu +++ b/paddle/fluid/operators/pad_constant_like_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/pad_constant_like_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/pad_op.cu b/paddle/fluid/operators/pad_op.cu index 9cddef9cf1d3c43701a4f0ed3f70dcb30c1dbd02..95098a8dca36594c3af60ad8488217e71c673a75 100644 --- a/paddle/fluid/operators/pad_op.cu +++ b/paddle/fluid/operators/pad_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/pad_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cu b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cu index 63cd47a38a0ff6413c430c6be6284c5f4bfc2595..4897474a485d8417854ffb53aa8ee64321c78ae7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include "paddle/fluid/operators/sequence_ops/sequence_pool_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu index 9aadac1a416034a3510dea2916d7577efbc2f8c2..a1fbc7e5fab71df486b53c31464c99e9c4557ccd 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/smooth_l1_loss_op.cu b/paddle/fluid/operators/smooth_l1_loss_op.cu index dfbb5c905884b57413587a4f6c33b0238b740c73..e5df479090fabe926f65f58e2300e3ee2027e54d 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op.cu +++ b/paddle/fluid/operators/smooth_l1_loss_op.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include "paddle/fluid/operators/smooth_l1_loss_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index 6d48796191dd13a45f0c7267bfaf05489f528a9d..cee3e87037e0f1439a08b7b275eedefe357a4b13 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/operators/softmax_with_cross_entropy_op.h" diff --git a/paddle/fluid/operators/split_selected_rows_op.h b/paddle/fluid/operators/split_selected_rows_op.h index af64607fafc6544047714e731846a2440be219b8..1fef2b3d378c96d087118d0136885e7e29aa237c 100644 --- a/paddle/fluid/operators/split_selected_rows_op.h +++ b/paddle/fluid/operators/split_selected_rows_op.h @@ -72,10 +72,11 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel { for (size_t i = 0; i < outs_rows_idx.size(); ++i) { auto rows_idx = outs_rows_idx[i]; outs[i]->set_height(height_sections[i]); + auto dims = x->GetCompleteDims(); + dims[0] = rows_idx.size(); + outs[i]->mutable_value()->mutable_data(dims, x->place()); + outs[i]->mutable_rows()->clear(); if (rows_idx.size() > 0) { - auto dims = x->GetCompleteDims(); - dims[0] = rows_idx.size(); - outs[i]->mutable_value()->mutable_data(dims, x->place()); for (auto idx : rows_idx) { outs[i]->mutable_rows()->push_back(idx - abs_sections[i]); } @@ -98,6 +99,8 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel { } } } + PADDLE_ENFORCE_EQ(rows_idx.size(), outs[i]->rows().size(), + "rows should has the same size with tensor dim 0"); } } }; diff --git a/paddle/fluid/operators/squared_l2_distance_op.cu b/paddle/fluid/operators/squared_l2_distance_op.cu index 3e80ae8dd22077c0f9bbdedc24e84f6c339c5a26..c9264da838246efded7d9f85664faf0dc1cec282 100644 --- a/paddle/fluid/operators/squared_l2_distance_op.cu +++ b/paddle/fluid/operators/squared_l2_distance_op.cu @@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU - #include "paddle/fluid/operators/squared_l2_distance_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/squared_l2_norm_op.cu b/paddle/fluid/operators/squared_l2_norm_op.cu index 87830413da3f141f01a97966ae0e2b0501ed600a..e31cfeb78ab8a8d1b55a198fe7a2c647a3dce665 100644 --- a/paddle/fluid/operators/squared_l2_norm_op.cu +++ b/paddle/fluid/operators/squared_l2_norm_op.cu @@ -11,8 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/squared_l2_norm_op.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/sum_op.cu b/paddle/fluid/operators/sum_op.cu index db4c2d6c115f04b436db00854ca4b02fea09866b..6125ed07b6d0f92fa317c581a06117dcfa7359ae 100644 --- a/paddle/fluid/operators/sum_op.cu +++ b/paddle/fluid/operators/sum_op.cu @@ -8,8 +8,6 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#define EIGEN_USE_GPU #include "paddle/fluid/operators/sum_op.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/platform/cuda_helper_test.cu b/paddle/fluid/platform/cuda_helper_test.cu index ee45afab93d079374aefe366425502890854c28d..466bf90c63c1496883995819cdcb19f846e4a302 100644 --- a/paddle/fluid/platform/cuda_helper_test.cu +++ b/paddle/fluid/platform/cuda_helper_test.cu @@ -93,7 +93,7 @@ TEST(CudaAtomic, float16) { // unalignment of uint8 void TestUnalign(size_t num, const int shift_bit) { - PADDLE_ENFORCE(num % 2 == 0, "must be a multiple of 2"); + ASSERT_EQ(num % 2, 0); float16 *in1, *in2, *out; float16 *d_in1, *d_in2; size_t size = sizeof(uint8_t) * (num + shift_bit); diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 37453a8c29f22371d8931b68e23a98832bd15adb..812e56f1f966d03207cf83ad47cb88e9fa5d55bb 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -21,7 +21,6 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/gpu_info.h" -#define EIGEN_USE_GPU #endif #ifdef PADDLE_WITH_MKLDNN diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index a85972bdb72ca3119cc14f9e2b810c3875443538..01ee67fd07f848356e801be95d53a61bb5b08e37 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -62,45 +62,54 @@ inline std::string demangle(std::string name) { return name; } #endif struct EnforceNotMet : public std::exception { - std::exception_ptr exp_; std::string err_str_; - EnforceNotMet(std::exception_ptr e, const char* f, int l) : exp_(e) { - static constexpr int TRACE_STACK_LIMIT = 100; + EnforceNotMet(std::exception_ptr e, const char* f, int l) { try { - std::rethrow_exception(exp_); - } catch (const std::exception& exp) { - std::ostringstream sout; + std::rethrow_exception(e); + } catch (std::exception& e) { + Init(e.what(), f, l); + } + } - sout << string::Sprintf("%s at [%s:%d]", exp.what(), f, l) << std::endl; - sout << "PaddlePaddle Call Stacks: " << std::endl; + template + EnforceNotMet(const char* f, int l, ARGS... args) { + Init(string::Sprintf(args...), f, l); + } + + const char* what() const noexcept override { return err_str_.c_str(); } + + private: + template + inline void Init(StrType what, const char* f, int l) { + static constexpr int TRACE_STACK_LIMIT = 100; + std::ostringstream sout; + + sout << string::Sprintf("%s at [%s:%d]", what, f, l) << std::endl; + sout << "PaddlePaddle Call Stacks: " << std::endl; #if !defined(_WIN32) - void* call_stack[TRACE_STACK_LIMIT]; - auto size = backtrace(call_stack, TRACE_STACK_LIMIT); - auto symbols = backtrace_symbols(call_stack, size); - - Dl_info info; - for (int i = 0; i < size; ++i) { - if (dladdr(call_stack[i], &info) && info.dli_sname) { - auto demangled = demangle(info.dli_sname); - auto addr_offset = static_cast(call_stack[i]) - - static_cast(info.dli_saddr); - sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, - 2 + sizeof(void*) * 2, call_stack[i], - demangled, addr_offset); - } else { - sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2, - call_stack[i]); - } + void* call_stack[TRACE_STACK_LIMIT]; + auto size = backtrace(call_stack, TRACE_STACK_LIMIT); + auto symbols = backtrace_symbols(call_stack, size); + Dl_info info; + for (int i = 0; i < size; ++i) { + if (dladdr(call_stack[i], &info) && info.dli_sname) { + auto demangled = demangle(info.dli_sname); + auto addr_offset = static_cast(call_stack[i]) - + static_cast(info.dli_saddr); + sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, + 2 + sizeof(void*) * 2, call_stack[i], demangled, + addr_offset); + } else { + sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2, + call_stack[i]); } - free(symbols); + } + free(symbols); #else - sout << "Windows not support stack backtrace yet."; + sout << "Windows not support stack backtrace yet."; #endif - err_str_ = sout.str(); - } + err_str_ = sout.str(); } - - const char* what() const noexcept { return err_str_.c_str(); } }; struct EOFException : public std::exception { @@ -242,13 +251,8 @@ inline void throw_on_error(T e) { throw_on_error(e, ""); } -#define PADDLE_THROW(...) \ - do { \ - throw ::paddle::platform::EnforceNotMet( \ - std::make_exception_ptr( \ - std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \ - __FILE__, __LINE__); \ - } while (false) +#define PADDLE_THROW(...) \ + throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__) #ifndef REPLACE_ENFORCE_GLOG #define PADDLE_ENFORCE(...) \ diff --git a/paddle/fluid/platform/float16.h b/paddle/fluid/platform/float16.h index 9d48557caf75f3571ead3df43a1a93cf65e4b8cb..98afe843c0035ec14ad874508dc02b8d1d3d359c 100644 --- a/paddle/fluid/platform/float16.h +++ b/paddle/fluid/platform/float16.h @@ -71,9 +71,6 @@ struct float16; } // namespace platform } // namespace paddle -// NOTE(): -// Do not move the eigen.h header, otherwise the eigen_vector will failed. -#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/platform/hostdevice.h" #include "unsupported/Eigen/CXX11/Tensor" diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index dca0c01ab229ce8c1f8578ad489ef927e15a1068..74b4f2e937b3d3715b13b03e8d3618c0afafb69c 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -336,6 +336,8 @@ PYBIND11_MODULE(core, m) { .def("get_tensor", [](SelectedRows &self) { return self.mutable_value(); }, py::return_value_policy::reference) + .def("numel", + [](SelectedRows &self) -> int64_t { return self.value().numel(); }) .def("set_height", &SelectedRows::set_height) .def("height", &SelectedRows::height) .def("set_rows", diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index dc1d152a093b1df4f1d6b76f83b00fe827016ae9..e0bb0d1152b258f9b16bb9063acf0ac6012d3432 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -128,7 +128,7 @@ def __bootstrap__(): 'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb', 'fast_eager_deletion_mode', 'allocator_strategy', 'reader_queue_speed_test_mode', - 'print_sub_graph_dir' + 'print_sub_graph_dir', 'pe_profile_fname' ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') diff --git a/python/paddle/fluid/average.py b/python/paddle/fluid/average.py index 42cd3b36420ef5a17a9a7d981978ba8869809936..40a734af311e2037c1816dce97db123ebedd2f4f 100644 --- a/python/paddle/fluid/average.py +++ b/python/paddle/fluid/average.py @@ -48,6 +48,7 @@ class WeightedAverage(object): Examples: .. code-block:: python + avg = fluid.average.WeightedAverage() avg.add(value=2.0, weight=1) avg.add(value=4.0, weight=2) diff --git a/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py b/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py index 50204b8a77c187aa695da83860960566448d290f..f8847e1570dc47d432777faa15f4004f1a7111a6 100644 --- a/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_selected_rows_op.py @@ -63,6 +63,7 @@ class TestSpliteSelectedRows(unittest.TestCase): # expected output selected rows expected_out0_rows = [0, 4] expected_out1_rows = [0, 2] + expected_out2_rows = [] expected_out4_rows = [0] op = Operator( @@ -75,6 +76,7 @@ class TestSpliteSelectedRows(unittest.TestCase): self.assertEqual(outs[0].rows(), expected_out0_rows) self.assertEqual(outs[1].rows(), expected_out1_rows) + self.assertEqual(outs[2].rows(), expected_out2_rows) self.assertEqual(outs[4].rows(), expected_out4_rows) self.assertEqual(outs[0].height(), height_sections[0]) @@ -84,6 +86,9 @@ class TestSpliteSelectedRows(unittest.TestCase): self.assertAlmostEqual(4.0, np.array(outs[1].get_tensor())[1, 1]) self.assertAlmostEqual(8.0, np.array(outs[4].get_tensor())[0, 1]) + self.assertEqual(outs[2].numel(), 0) + self.assertEqual(outs[3].numel(), 0) + def check_grad_with_place(self, place): scope = core.Scope() height = 10