From 83f4e9e9a6c33c6060996643479b13cfff669430 Mon Sep 17 00:00:00 2001 From: Houjiang Chen Date: Fri, 25 May 2018 07:34:56 -0500 Subject: [PATCH] enable eigen multi-threads on mobile device (#10938) --- CMakeLists.txt | 1 + cmake/configure.cmake | 4 ++ paddle/function/EigenGemm.cpp | 17 +++---- paddle/function/EigenThreadDevice.h | 73 +++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 paddle/function/EigenThreadDevice.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 93f875757..cfaab206e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,7 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF) option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) +option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index e490397cc..682614742 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS) add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) endif(USE_EIGEN_FOR_BLAS) +if(EIGEN_USE_THREADS) + add_definitions(-DEIGEN_USE_THREADS) +endif(EIGEN_USE_THREADS) + if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index bac4659e6..8e9dbbd7a 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/function/EigenThreadDevice.h" namespace paddle { @@ -70,25 +70,26 @@ struct EigenBlasGemm { dims[0].first = transA ? 0 : 1; dims[0].second = transB ? 1 : 0; - Eigen::DefaultDevice device; + auto* device = EigenDeviceWarpper::device(); if (N == ldc) { if (alpha == T(1) && beta == T(0)) { - c.device(device) = a.contract(b, dims); + c.device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.device(device) += a.contract(b, dims); + c.device(*device) += a.contract(b, dims); } else { - c.device(device) = alpha * a.contract(b, dims) + beta * c; + c.device(*device) = alpha * a.contract(b, dims) + beta * c; } } else { if (alpha == T(1) && beta == T(0)) { - c.slice(offsetC, extentC).device(device) = a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) = a.contract(b, dims); } else if (alpha == T(1) && beta == T(1)) { - c.slice(offsetC, extentC).device(device) += a.contract(b, dims); + c.slice(offsetC, extentC).device(*device) += a.contract(b, dims); } else { - c.slice(offsetC, extentC).device(device) = + c.slice(offsetC, extentC).device(*device) = alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC); } } + EigenDeviceWarpper::free_device(device); } }; diff --git a/paddle/function/EigenThreadDevice.h b/paddle/function/EigenThreadDevice.h new file mode 100644 index 000000000..74269aa66 --- /dev/null +++ b/paddle/function/EigenThreadDevice.h @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#pragma once + +#if defined(__OSX__) || defined(__APPLE__) +#include +#include +#endif +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { + +#if defined(__ANDROID__) +int GetCpuCount() { + FILE* fp = fopen("/sys/devices/system/cpu/possible", "r"); + if (!fp) { + return 1; + } + int rank0, rank1; + int num = fscanf(fp, "%d-%d", &rank0, &rank1); + fclose(fp); + if (num < 2) return 1; + return rank1 + 1; +} +#elif defined(__OSX__) || defined(__APPLE__) +int GetCpuCount() { + int count = 0; + size_t len = sizeof(int); + sysctlbyname("hw.ncpu", &count, &len, NULL, 0); + return count > 0 ? count : 1; +} +#else +int GetCpuCount() { return 1; } +#endif + +class EigenDeviceWarpper { +public: // NOLINT +#if EIGEN_USE_THREADS + static Eigen::ThreadPoolDevice* device() { + const int num_cpus = GetCpuCount(); + const int num_threads = (num_cpus > 2) ? 2 : num_cpus; + static Eigen::ThreadPool tp(num_threads); + static Eigen::ThreadPoolDevice* device = + new Eigen::ThreadPoolDevice(&tp, num_threads); + return device; + } + + static void free_device(Eigen::ThreadPoolDevice* device) { + // do nothing + } +#else + static Eigen::DefaultDevice* device() { + Eigen::DefaultDevice* device = new Eigen::DefaultDevice; + return device; + } + + static void free_device(Eigen::DefaultDevice* device) { delete device; } +#endif +}; + +} // namespace paddle -- GitLab