提交 b7bf5f08 编写于 作者: xiebaiyuan's avatar xiebaiyuan

Merge remote-tracking branch 'upstream/develop' into develop

cmake_minimum_required(VERSION 3.6) cmake_minimum_required(VERSION 3.6)
option(USE_OPENMP "openmp support" ON)
project(paddle-mobile) project(paddle-mobile)
option(DEBUGING "enable debug mode" OFF) option(DEBUGING "enable debug mode" OFF)
option(USE_OPENMP "openmp support" OFF)
option(USE_EXCEPTION "use std exception" OFF) option(USE_EXCEPTION "use std exception" OFF)
option(LOG_PROFILE "log profile" OFF) option(LOG_PROFILE "log profile" OFF)
# select the platform to build # select the platform to build
...@@ -149,7 +150,17 @@ if (ANDROID_NDK_TOOLCHAIN_INCLUDED) ...@@ -149,7 +150,17 @@ if (ANDROID_NDK_TOOLCHAIN_INCLUDED)
list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS) list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS)
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
elseif(IS_IOS) elseif(IS_IOS)
add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) if(USE_OPENMP)
add_library(paddle-mobile-stage0 STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
add_custom_target(paddle-mobile ALL
COMMAND libtool -static -o ${CMAKE_BINARY_DIR}/libpaddle-mobile.a ${CMAKE_CURRENT_LIST_DIR}/tools/libomp.a $<TARGET_FILE:paddle-mobile-stage0>
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS paddle-mobile
)
add_dependencies(paddle-mobile paddle-mobile-stage0)
else()
add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
endif()
else () else ()
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H}) add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
endif () endif ()
......
...@@ -58,6 +58,7 @@ static std::mutex shared_mutex; ...@@ -58,6 +58,7 @@ static std::mutex shared_mutex;
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath{ - (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath{
std::string model_path_str = std::string([modelPath UTF8String]); std::string model_path_str = std::string([modelPath UTF8String]);
std::string weights_path_str = std::string([weighsPath UTF8String]); std::string weights_path_str = std::string([weighsPath UTF8String]);
pam_->SetThreadNum(2);
if (loaded_ = pam_->Load(model_path_str, weights_path_str, true)) { if (loaded_ = pam_->Load(model_path_str, weights_path_str, true)) {
return YES; return YES;
} else { } else {
......
...@@ -3002,14 +3002,21 @@ void Sgemm(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -3002,14 +3002,21 @@ void Sgemm(int m, int n, int k, float alpha, const float *A, int lda,
NC = L2 / (KC * sizeof(float)); NC = L2 / (KC * sizeof(float));
// make sure MC is multiple of MR, and NC is multiple of NR // make sure MC is multiple of MR, and NC is multiple of NR
int mblock_num = (m + MC - 1) / MC; if (MC == 0) {
MC = (m + mblock_num - 1) / mblock_num; MC = MR;
MC = (MC + MR - 1) / MR * MR; } else {
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + MR - 1) / MR * MR;
}
// DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n"; // DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n";
if (NC == 0) {
int nblock_num = (n + NC - 1) / NC; NC = NR;
NC = (n + nblock_num - 1) / nblock_num; } else {
NC = (NC + NR - 1) / NR * NR; int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + NR - 1) / NR * NR;
}
// DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n"; // DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n";
packedA = static_cast<float *>( packedA = static_cast<float *>(
...@@ -3067,14 +3074,21 @@ void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -3067,14 +3074,21 @@ void SgemmWithBn(int m, int n, int k, float alpha, const float *A, int lda,
NC = L2 / (KC * sizeof(float)); NC = L2 / (KC * sizeof(float));
// make sure MC is multiple of MR, and NC is multiple of NR // make sure MC is multiple of MR, and NC is multiple of NR
int mblock_num = (m + MC - 1) / MC; if (MC == 0) {
MC = (m + mblock_num - 1) / mblock_num; MC = MR;
MC = (MC + MR - 1) / MR * MR; } else {
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + MR - 1) / MR * MR;
}
// DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n"; // DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n";
if (NC == 0) {
int nblock_num = (n + NC - 1) / NC; NC = NR;
NC = (n + nblock_num - 1) / nblock_num; } else {
NC = (NC + NR - 1) / NR * NR; int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + NR - 1) / NR * NR;
}
// DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n"; // DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n";
packedA = static_cast<float *>( packedA = static_cast<float *>(
...@@ -3133,14 +3147,21 @@ void SgemmWithPRelu(int m, int n, int k, const float *A, int lda, ...@@ -3133,14 +3147,21 @@ void SgemmWithPRelu(int m, int n, int k, const float *A, int lda,
NC = L2 / (KC * sizeof(float)); NC = L2 / (KC * sizeof(float));
// make sure MC is multiple of MR, and NC is multiple of NR // make sure MC is multiple of MR, and NC is multiple of NR
int mblock_num = (m + MC - 1) / MC; if (MC == 0) {
MC = (m + mblock_num - 1) / mblock_num; MC = MR;
MC = (MC + MR - 1) / MR * MR; } else {
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + MR - 1) / MR * MR;
}
// DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n"; // DLOG << "mblock_num = " << mblock_num << ", MC = " << MC << "\n";
if (NC == 0) {
int nblock_num = (n + NC - 1) / NC; NC = NR;
NC = (n + nblock_num - 1) / nblock_num; } else {
NC = (NC + NR - 1) / NR * NR; int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + NR - 1) / NR * NR;
}
// DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n"; // DLOG << "nblock_num = " << nblock_num << ", NC = " << NC << "\n";
packedA = static_cast<float *>( packedA = static_cast<float *>(
...@@ -3203,9 +3224,13 @@ void Sgemm_omp(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -3203,9 +3224,13 @@ void Sgemm_omp(int m, int n, int k, float alpha, const float *A, int lda,
if (m > n) { if (m > n) {
// 对 A 分块 // 对 A 分块
MC = L1 / (KC * sizeof(float)); MC = L1 / (KC * sizeof(float));
int mblock_num = (m + MC - 1) / MC; if (MC == 0) {
MC = (m + mblock_num - 1) / mblock_num; MC = MR;
MC = (MC + MR - 1) / MR * MR; } else {
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + MR - 1) / MR * MR;
}
// 补齐 B // 补齐 B
NC = (n + NR - 1) / NR * NR; NC = (n + NR - 1) / NR * NR;
...@@ -3227,9 +3252,13 @@ void Sgemm_omp(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -3227,9 +3252,13 @@ void Sgemm_omp(int m, int n, int k, float alpha, const float *A, int lda,
} else { } else {
// 对 B 分块 // 对 B 分块
NC = L1 / (KC * sizeof(float)); NC = L1 / (KC * sizeof(float));
int nblock_num = (n + NC - 1) / NC; if (NC == 0) {
NC = (n + nblock_num - 1) / nblock_num; NC = NR;
NC = (NC + NR - 1) / NR * NR; } else {
int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + NR - 1) / NR * NR;
}
// 补齐 A // 补齐 A
MC = (m + MR - 1) / MR * MR; MC = (m + MR - 1) / MR * MR;
...@@ -3311,9 +3340,13 @@ void SgemmWithBn_omp(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -3311,9 +3340,13 @@ void SgemmWithBn_omp(int m, int n, int k, float alpha, const float *A, int lda,
if (m > n) { if (m > n) {
// 对 A 分块 // 对 A 分块
MC = L1 / (KC * sizeof(float)); MC = L1 / (KC * sizeof(float));
int mblock_num = (m + MC - 1) / MC; if (MC == 0) {
MC = (m + mblock_num - 1) / mblock_num; MC = MR;
MC = (MC + MR - 1) / MR * MR; } else {
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + MR - 1) / MR * MR;
}
// 补齐 B // 补齐 B
NC = (n + NR - 1) / NR * NR; NC = (n + NR - 1) / NR * NR;
...@@ -3335,9 +3368,13 @@ void SgemmWithBn_omp(int m, int n, int k, float alpha, const float *A, int lda, ...@@ -3335,9 +3368,13 @@ void SgemmWithBn_omp(int m, int n, int k, float alpha, const float *A, int lda,
} else { } else {
// 对 B 分块 // 对 B 分块
NC = L1 / (KC * sizeof(float)); NC = L1 / (KC * sizeof(float));
int nblock_num = (n + NC - 1) / NC; if (NC == 0) {
NC = (n + nblock_num - 1) / nblock_num; NC == NR;
NC = (NC + NR - 1) / NR * NR; } else {
int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + NR - 1) / NR * NR;
}
// 补齐 A // 补齐 A
MC = (m + MR - 1) / MR * MR; MC = (m + MR - 1) / MR * MR;
...@@ -3430,9 +3467,13 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda, ...@@ -3430,9 +3467,13 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda,
if (m > n) { if (m > n) {
// 对 A 分块 // 对 A 分块
MC = L1 / (KC * sizeof(float)); MC = L1 / (KC * sizeof(float));
int mblock_num = (m + MC - 1) / MC; if (MC == 0) {
MC = (m + mblock_num - 1) / mblock_num; MC = MR;
MC = (MC + MR - 1) / MR * MR; } else {
int mblock_num = (m + MC - 1) / MC;
MC = (m + mblock_num - 1) / mblock_num;
MC = (MC + MR - 1) / MR * MR;
}
// 补齐 B // 补齐 B
NC = (n + NR - 1) / NR * NR; NC = (n + NR - 1) / NR * NR;
...@@ -3454,9 +3495,13 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda, ...@@ -3454,9 +3495,13 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda,
} else { } else {
// 对 B 分块 // 对 B 分块
NC = L1 / (KC * sizeof(float)); NC = L1 / (KC * sizeof(float));
int nblock_num = (n + NC - 1) / NC; if (NC == 0) {
NC = (n + nblock_num - 1) / nblock_num; NC = NR;
NC = (NC + NR - 1) / NR * NR; } else {
int nblock_num = (n + NC - 1) / NC;
NC = (n + nblock_num - 1) / nblock_num;
NC = (NC + NR - 1) / NR * NR;
}
// 补齐 A // 补齐 A
MC = (m + MR - 1) / MR * MR; MC = (m + MR - 1) / MR * MR;
......
...@@ -28,16 +28,9 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a, ...@@ -28,16 +28,9 @@ void matmul<float>(const framework::Tensor &matrix_a, bool trans_a,
auto dim_a = matrix_a.dims(); auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims(); auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims(); auto dim_out = matrix_out->dims();
// PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && PADDLE_MOBILE_ENFORCE(
// dim_out.size() == dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
// 2, "The input and output of matmul be matrix");
// "The input and output of matmul be matrix");
//
// PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
// platform::is_cpu_place(matrix_b.place())
// &&
// platform::is_cpu_place(matrix_out->place()),
// "Matrix must all be in CPUPlace");
int M = dim_out[0]; int M = dim_out[0];
int N = dim_out[1]; int N = dim_out[1];
...@@ -61,16 +54,9 @@ void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a, ...@@ -61,16 +54,9 @@ void matmulWithBn<float>(const framework::Tensor &matrix_a, bool trans_a,
auto dim_a = matrix_a.dims(); auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims(); auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims(); auto dim_out = matrix_out->dims();
// PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && PADDLE_MOBILE_ENFORCE(
// dim_out.size() == dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
// 2, "The input and output of matmul be matrix");
// "The input and output of matmul be matrix");
//
// PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
// platform::is_cpu_place(matrix_b.place())
// &&
// platform::is_cpu_place(matrix_out->place()),
// "Matrix must all be in CPUPlace");
int M = dim_out[0]; int M = dim_out[0];
int N = dim_out[1]; int N = dim_out[1];
...@@ -95,16 +81,9 @@ void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a, ...@@ -95,16 +81,9 @@ void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a,
auto dim_a = matrix_a.dims(); auto dim_a = matrix_a.dims();
auto dim_b = matrix_b.dims(); auto dim_b = matrix_b.dims();
auto dim_out = matrix_out->dims(); auto dim_out = matrix_out->dims();
// PADDLE_ENFORCE(dim_a.size() == 2 && dim_b.size() == 2 && PADDLE_MOBILE_ENFORCE(
// dim_out.size() == dim_a.size() == 2 && dim_b.size() == 2 && dim_out.size() == 2,
// 2, "The input and output of matmul be matrix");
// "The input and output of matmul be matrix");
//
// PADDLE_ENFORCE(platform::is_cpu_place(matrix_a.place()) &&
// platform::is_cpu_place(matrix_b.place())
// &&
// platform::is_cpu_place(matrix_out->place()),
// "Matrix must all be in CPUPlace");
int M = dim_out[0]; int M = dim_out[0];
int N = dim_out[1]; int N = dim_out[1];
......
...@@ -50,8 +50,13 @@ endif (CMAKE_UNAME) ...@@ -50,8 +50,13 @@ endif (CMAKE_UNAME)
#include (CMakeForceCompiler) #include (CMakeForceCompiler)
#CMAKE_C_COMPILER (/usr/bin/gcc) #CMAKE_C_COMPILER (/usr/bin/gcc)
#CMAKE_CXX_COMPILER (/usr/bin/g++) #CMAKE_CXX_COMPILER (/usr/bin/g++)
set(CMAKE_C_COMPILER /usr/bin/gcc) if(USE_OPENMP)
set(CMAKE_CXX_COMPILER /usr/bin/g++) set(CMAKE_C_COMPILER /usr/local/opt/llvm/bin/clang)
set(CMAKE_CXX_COMPILER /usr/local/opt/llvm/bin/clang++)
else()
set(CMAKE_C_COMPILER /usr/bin/gcc)
set(CMAKE_CXX_COMPILER /usr/bin/g++)
endif()
set(CMAKE_AR ar CACHE FILEPATH "" FORCE) set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
# Skip the platform compiler checks for cross compiling # Skip the platform compiler checks for cross compiling
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册