提交 83f4e9e9 编写于 作者: H Houjiang Chen 提交者: qingqing01

enable eigen multi-threads on mobile device (#10938)

上级 0930646b
......@@ -57,6 +57,7 @@ option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
option(WITH_DISTRIBUTE "Compile with grpc distributed support" OFF)
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF)
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
......
......@@ -41,6 +41,10 @@ if(USE_EIGEN_FOR_BLAS)
add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS)
endif(USE_EIGEN_FOR_BLAS)
if(EIGEN_USE_THREADS)
add_definitions(-DEIGEN_USE_THREADS)
endif(EIGEN_USE_THREADS)
if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include "unsupported/Eigen/CXX11/Tensor"
#include "paddle/function/EigenThreadDevice.h"
namespace paddle {
......@@ -70,25 +70,26 @@ struct EigenBlasGemm {
dims[0].first = transA ? 0 : 1;
dims[0].second = transB ? 1 : 0;
Eigen::DefaultDevice device;
auto* device = EigenDeviceWarpper::device();
if (N == ldc) {
if (alpha == T(1) && beta == T(0)) {
c.device(device) = a.contract(b, dims);
c.device(*device) = a.contract(b, dims);
} else if (alpha == T(1) && beta == T(1)) {
c.device(device) += a.contract(b, dims);
c.device(*device) += a.contract(b, dims);
} else {
c.device(device) = alpha * a.contract(b, dims) + beta * c;
c.device(*device) = alpha * a.contract(b, dims) + beta * c;
}
} else {
if (alpha == T(1) && beta == T(0)) {
c.slice(offsetC, extentC).device(device) = a.contract(b, dims);
c.slice(offsetC, extentC).device(*device) = a.contract(b, dims);
} else if (alpha == T(1) && beta == T(1)) {
c.slice(offsetC, extentC).device(device) += a.contract(b, dims);
c.slice(offsetC, extentC).device(*device) += a.contract(b, dims);
} else {
c.slice(offsetC, extentC).device(device) =
c.slice(offsetC, extentC).device(*device) =
alpha * a.contract(b, dims) + beta * c.slice(offsetC, extentC);
}
}
EigenDeviceWarpper::free_device(device);
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#if defined(__OSX__) || defined(__APPLE__)
#include <sys/sysctl.h>
#include <sys/types.h>
#endif
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
#if defined(__ANDROID__)
int GetCpuCount() {
FILE* fp = fopen("/sys/devices/system/cpu/possible", "r");
if (!fp) {
return 1;
}
int rank0, rank1;
int num = fscanf(fp, "%d-%d", &rank0, &rank1);
fclose(fp);
if (num < 2) return 1;
return rank1 + 1;
}
#elif defined(__OSX__) || defined(__APPLE__)
int GetCpuCount() {
int count = 0;
size_t len = sizeof(int);
sysctlbyname("hw.ncpu", &count, &len, NULL, 0);
return count > 0 ? count : 1;
}
#else
int GetCpuCount() { return 1; }
#endif
class EigenDeviceWarpper {
public: // NOLINT
#if EIGEN_USE_THREADS
static Eigen::ThreadPoolDevice* device() {
const int num_cpus = GetCpuCount();
const int num_threads = (num_cpus > 2) ? 2 : num_cpus;
static Eigen::ThreadPool tp(num_threads);
static Eigen::ThreadPoolDevice* device =
new Eigen::ThreadPoolDevice(&tp, num_threads);
return device;
}
static void free_device(Eigen::ThreadPoolDevice* device) {
// do nothing
}
#else
static Eigen::DefaultDevice* device() {
Eigen::DefaultDevice* device = new Eigen::DefaultDevice;
return device;
}
static void free_device(Eigen::DefaultDevice* device) { delete device; }
#endif
};
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册