diff --git a/CMakeLists.txt b/CMakeLists.txt index 93f8757571ddcd717f25bb0f710be5462803af98..cfaab206e1f321a55119d4a8d65c4a99d3819fff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,7 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) +option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index e490397cc0624c310949a4b571bd00cac6e8953b..682614742cf1bd3130c638020a2545e16226d4d6 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS) add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) endif(USE_EIGEN_FOR_BLAS) +if(EIGEN_USE_THREADS) + add_definitions(-DEIGEN_USE_THREADS) +endif(EIGEN_USE_THREADS) + if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index bac4659e62b107dd80ef95dd0907b3da4becffbc..8e9dbbd7a154095a7298bb2f59a82d13a60f9bd3 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/function/EigenThreadDevice.h" namespace paddle { @@ -70,25 +70,26 @@ struct EigenBlasGemm { dims[0].first = transA ? 0 : 1; dims[0].second = transB ? 1 : 0; - Eigen::DefaultDevice device; + auto* device = EigenDeviceWarpper::device(); if (N == ldc) { if (alpha == T(1) && beta == T(0)) { - c.device(device) = a.contract(b, dims); + c.device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.device(device) += a.contract(b, dims); + c.device(*device) += a.contract(b, dims); } else { - c.device(device) = alpha * a.contract(b, dims) + beta * c; + c.device(*device) = alpha * a.contract(b, dims) + beta * c; } } else { if (alpha == T(1) && beta == T(0)) { - c.slice(offsetC, extentC).device(device) = a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.slice(offsetC, extentC).device(device) += a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) += a.contract(b, dims); } else { - c.slice(offsetC, extentC).device(device) = + c.slice(offsetC, extentC).device(*device) = alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC); } } + EigenDeviceWarpper::free_device(device); } }; diff --git a/paddle/function/EigenThreadDevice.h b/paddle/function/EigenThreadDevice.h new file mode 100644 index 0000000000000000000000000000000000000000..74269aa664a711c905e12a61958c9ab01e2340c0 --- /dev/null +++ b/paddle/function/EigenThreadDevice.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#if defined(__OSX__) || defined(__APPLE__) +#include +#include +#endif +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { + +#if defined(__ANDROID__) +int GetCpuCount() { + FILE* fp = fopen("/sys/devices/system/cpu/possible", "r"); + if (!fp) { + return 1; + } + int rank0, rank1; + int num = fscanf(fp, "%d-%d", &rank0, &rank1); + fclose(fp); + if (num < 2) return 1; + return rank1 + 1; +} +#elif defined(__OSX__) || defined(__APPLE__) +int GetCpuCount() { + int count = 0; + size_t len = sizeof(int); + sysctlbyname("hw.ncpu", &count, &len, NULL, 0); + return count > 0 ? count : 1; +} +#else +int GetCpuCount() { return 1; } +#endif + +class EigenDeviceWarpper { +public: // NOLINT +#if EIGEN_USE_THREADS + static Eigen::ThreadPoolDevice* device() { + const int num_cpus = GetCpuCount(); + const int num_threads = (num_cpus > 2) ? 2 : num_cpus; + static Eigen::ThreadPool tp(num_threads); + static Eigen::ThreadPoolDevice* device = + new Eigen::ThreadPoolDevice(&tp, num_threads); + return device; + } + + static void free_device(Eigen::ThreadPoolDevice* device) { + // do nothing + } +#else + static Eigen::DefaultDevice* device() { + Eigen::DefaultDevice* device = new Eigen::DefaultDevice; + return device; + } + + static void free_device(Eigen::DefaultDevice* device) { delete device; } +#endif +}; + +} // namespace paddle