未验证 提交 53a5906c 编写于 作者: W Wilber 提交者: GitHub

fix fluid-lite-subgraph x86 compile error test=develop (#2682)

-fix fluid-lite-subgraph x86 compile error
    - Replace FLAGS with environment variables
上级 52f325e3
...@@ -32,26 +32,37 @@ ...@@ -32,26 +32,37 @@
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <algorithm> #include <algorithm>
DEFINE_double(fraction_of_cpu_memory_to_use, #include "lite/utils/env.h"
1,
"Default use 100% of CPU memory for PaddlePaddle," // DEFINE_double(fraction_of_cpu_memory_to_use,
"reserve the rest for page tables, etc"); // 1,
DEFINE_uint64(initial_cpu_memory_in_mb, // "Default use 100% of CPU memory for PaddlePaddle,"
500ul, // "reserve the rest for page tables, etc");
"Initial CPU memory for PaddlePaddle, in MD unit."); double fraction_of_cpu_memory_to_use =
paddle::lite::GetDoubleFromEnv("fraction_of_cpu_memory_to_use", 1);
DEFINE_double(
fraction_of_cuda_pinned_memory_to_use, // DEFINE_uint64(initial_cpu_memory_in_mb,
0.5, // 500ul,
"Default use 50% of CPU memory as the pinned_memory for PaddlePaddle," // "Initial CPU memory for PaddlePaddle, in MD unit.");
"reserve the rest for page tables, etc"); uint64_t initial_cpu_memory_in_mb =
paddle::lite::GetUInt64FromEnv("initial_cpu_memory_in_mb", 500ul);
// DEFINE_double(
// fraction_of_cuda_pinned_memory_to_use,
// 0.5,
// "Default use 50% of CPU memory as the pinned_memory for PaddlePaddle,"
// "reserve the rest for page tables, etc");
double fraction_of_cuda_pinned_memory_to_use = paddle::lite::GetDoubleFromEnv(
"fraction_of_cuda_pinned_memory_to_use", 0.5);
// If use_pinned_memory is true, CPUAllocator calls mlock, which // If use_pinned_memory is true, CPUAllocator calls mlock, which
// returns pinned and locked memory as staging areas for data exchange // returns pinned and locked memory as staging areas for data exchange
// between host and device. Allocates too much would reduce the amount // between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we // of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory. // should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); // DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
bool use_pinned_memory =
paddle::lite::GetBoolFromEnv("use_pinned_memory", true);
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -81,7 +92,7 @@ size_t CpuTotalPhysicalMemory() { ...@@ -81,7 +92,7 @@ size_t CpuTotalPhysicalMemory() {
size_t CpuMaxAllocSize() { size_t CpuMaxAllocSize() {
// For distributed systems, it requires configuring and limiting // For distributed systems, it requires configuring and limiting
// the fraction of memory to use. // the fraction of memory to use.
return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); return fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
} }
size_t CpuMinChunkSize() { size_t CpuMinChunkSize() {
...@@ -92,15 +103,14 @@ size_t CpuMinChunkSize() { ...@@ -92,15 +103,14 @@ size_t CpuMinChunkSize() {
size_t CpuMaxChunkSize() { size_t CpuMaxChunkSize() {
// Allow to allocate the maximum chunk size is roughly 3% of CPU memory, // Allow to allocate the maximum chunk size is roughly 3% of CPU memory,
// or the initial_cpu_memory_in_mb. // or the initial_cpu_memory_in_mb.
return std::min( return std::min(static_cast<size_t>(CpuMaxAllocSize() / 32),
static_cast<size_t>(CpuMaxAllocSize() / 32), static_cast<size_t>(initial_cpu_memory_in_mb * 1 << 20));
static_cast<size_t>(FLAGS_initial_cpu_memory_in_mb * 1 << 20));
} }
size_t CUDAPinnedMaxAllocSize() { size_t CUDAPinnedMaxAllocSize() {
// For distributed systems, it requires configuring and limiting // For distributed systems, it requires configuring and limiting
// the fraction of memory to use. // the fraction of memory to use.
return FLAGS_fraction_of_cuda_pinned_memory_to_use * CpuTotalPhysicalMemory(); return fraction_of_cuda_pinned_memory_to_use * CpuTotalPhysicalMemory();
} }
size_t CUDAPinnedMinChunkSize() { size_t CUDAPinnedMinChunkSize() {
......
...@@ -22,36 +22,46 @@ limitations under the License. */ ...@@ -22,36 +22,46 @@ limitations under the License. */
#include "lite/backends/x86/cupti_lib_path.h" #include "lite/backends/x86/cupti_lib_path.h"
#include "lite/backends/x86/port.h" #include "lite/backends/x86/port.h"
#include "lite/backends/x86/warpctc_lib_path.h" #include "lite/backends/x86/warpctc_lib_path.h"
#include "lite/utils/env.h"
#include "lite/utils/paddle_enforce.h" #include "lite/utils/paddle_enforce.h"
DEFINE_string(cudnn_dir, // DEFINE_string(cudnn_dir,
"", // "",
"Specify path for loading libcudnn.so. For instance, " // "Specify path for loading libcudnn.so. For instance, "
"/usr/local/cudnn/lib. If empty [default], dlopen " // "/usr/local/cudnn/lib. If empty [default], dlopen "
"will search cudnn from LD_LIBRARY_PATH"); // "will search cudnn from LD_LIBRARY_PATH");
std::string cudnn_dir = paddle::lite::GetStringFromEnv("cudnn_dir"); // NOLINT
DEFINE_string(cuda_dir, // DEFINE_string(cuda_dir,
"", // "",
"Specify path for loading cuda library, such as libcublas, " // "Specify path for loading cuda library, such as libcublas, "
"libcurand. For instance, /usr/local/cuda/lib64. If default, " // "libcurand. For instance, /usr/local/cuda/lib64. If default, "
"dlopen will search cuda from LD_LIBRARY_PATH"); // "dlopen will search cuda from LD_LIBRARY_PATH");
std::string cuda_dir = paddle::lite::GetStringFromEnv("cuda_dir"); // NOLINT
DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); // DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so.");
std::string f_warpctc_dir = // NOLINT
paddle::lite::GetStringFromEnv("warpctc_dir"); // NOLINT
DEFINE_string(nccl_dir, // DEFINE_string(nccl_dir,
"", // "",
"Specify path for loading nccl library, such as libcublas, " // "Specify path for loading nccl library, such as libcublas, "
"libcurand. For instance, /usr/local/cuda/lib64. If default, " // "libcurand. For instance, /usr/local/cuda/lib64. If default, "
"dlopen will search cuda from LD_LIBRARY_PATH"); // "dlopen will search cuda from LD_LIBRARY_PATH");
std::string nccl_dir = paddle::lite::GetStringFromEnv("nccl_dir"); // NOLINT
DEFINE_string(cupti_dir, "", "Specify path for loading cupti.so."); // DEFINE_string(cupti_dir, "", "Specify path for loading cupti.so.");
std::string cupti_dir = paddle::lite::GetStringFromEnv("cupti_dir"); // NOLINT
DEFINE_string( // DEFINE_string(
tensorrt_dir, // tensorrt_dir,
"", // "",
"Specify path for loading tensorrt library, such as libnvinfer.so."); // "Specify path for loading tensorrt library, such as libnvinfer.so.");
std::string tensorrt_dir = // NOLINT
paddle::lite::GetStringFromEnv("tensorrt_dir"); // NOLINT
DEFINE_string(mklml_dir, "", "Specify path for loading libmklml_intel.so."); // DEFINE_string(mklml_dir, "", "Specify path for loading libmklml_intel.so.");
std::string mklml_dir = paddle::lite::GetStringFromEnv("mklml_dir"); // NOLINT
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -180,28 +190,28 @@ auto error_msg = ...@@ -180,28 +190,28 @@ auto error_msg =
void* GetCublasDsoHandle() { void* GetCublasDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib"); return GetDsoHandleFromSearchPath(cuda_dir, "libcublas.dylib");
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, win_cublas_lib); return GetDsoHandleFromSearchPath(cuda_dir, win_cublas_lib);
#else #else
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so"); return GetDsoHandleFromSearchPath(cuda_dir, "libcublas.so");
#endif #endif
} }
void* GetCUDNNDsoHandle() { void* GetCUDNNDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", false); return GetDsoHandleFromSearchPath(cudnn_dir, "libcudnn.dylib", false);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, win_cudnn_lib); return GetDsoHandleFromSearchPath(cudnn_dir, win_cudnn_lib);
#else #else
return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", false); return GetDsoHandleFromSearchPath(cudnn_dir, "libcudnn.so", false);
#endif #endif
} }
void* GetCUPTIDsoHandle() { void* GetCUPTIDsoHandle() {
std::string cupti_path = cupti_lib_path; std::string cupti_path = cupti_lib_path;
if (!FLAGS_cupti_dir.empty()) { if (!cupti_dir.empty()) {
cupti_path = FLAGS_cupti_dir; cupti_path = cupti_dir;
} }
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(cupti_path, "libcupti.dylib", false); return GetDsoHandleFromSearchPath(cupti_path, "libcupti.dylib", false);
...@@ -212,18 +222,18 @@ void* GetCUPTIDsoHandle() { ...@@ -212,18 +222,18 @@ void* GetCUPTIDsoHandle() {
void* GetCurandDsoHandle() { void* GetCurandDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib"); return GetDsoHandleFromSearchPath(cuda_dir, "libcurand.dylib");
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, win_curand_lib); return GetDsoHandleFromSearchPath(cuda_dir, win_curand_lib);
#else #else
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so"); return GetDsoHandleFromSearchPath(cuda_dir, "libcurand.so");
#endif #endif
} }
void* GetWarpCTCDsoHandle() { void* GetWarpCTCDsoHandle() {
std::string warpctc_dir = warpctc_lib_path; std::string warpctc_dir = warpctc_lib_path;
if (!FLAGS_warpctc_dir.empty()) { if (!f_warpctc_dir.empty()) {
warpctc_dir = FLAGS_warpctc_dir; warpctc_dir = f_warpctc_dir;
} }
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(warpctc_dir, "libwarpctc.dylib"); return GetDsoHandleFromSearchPath(warpctc_dir, "libwarpctc.dylib");
...@@ -236,27 +246,27 @@ void* GetWarpCTCDsoHandle() { ...@@ -236,27 +246,27 @@ void* GetWarpCTCDsoHandle() {
void* GetNCCLDsoHandle() { void* GetNCCLDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib"); return GetDsoHandleFromSearchPath(nccl_dir, "libnccl.dylib");
#else #else
return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so"); return GetDsoHandleFromSearchPath(nccl_dir, "libnccl.so");
#endif #endif
} }
void* GetTensorRtDsoHandle() { void* GetTensorRtDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.dylib"); return GetDsoHandleFromSearchPath(tensorrt_dir, "libnvinfer.dylib");
#else #else
return GetDsoHandleFromSearchPath(FLAGS_tensorrt_dir, "libnvinfer.so"); return GetDsoHandleFromSearchPath(tensorrt_dir, "libnvinfer.so");
#endif #endif
} }
void* GetMKLMLDsoHandle() { void* GetMKLMLDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "libmklml_intel.dylib"); return GetDsoHandleFromSearchPath(mklml_dir, "libmklml_intel.dylib");
#elif defined(_WIN32) #elif defined(_WIN32)
return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "mklml.dll"); return GetDsoHandleFromSearchPath(mklml_dir, "mklml.dll");
#else #else
return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "libmklml_intel.so"); return GetDsoHandleFromSearchPath(mklml_dir, "libmklml_intel.so");
#endif #endif
} }
......
...@@ -21,13 +21,15 @@ ...@@ -21,13 +21,15 @@
// posix_memalign // posix_memalign
#include "lite/backends/x86/cpu_info.h" #include "lite/backends/x86/cpu_info.h"
#include "lite/backends/x86/jit/macro.h" #include "lite/backends/x86/jit/macro.h"
#include "lite/utils/env.h"
#include "lite/utils/paddle_enforce.h" #include "lite/utils/paddle_enforce.h"
#ifndef _WIN32 #ifndef _WIN32
#define posix_memalign_free free #define posix_memalign_free free
#endif #endif
DEFINE_bool(dump_jitcode, false, "Whether to dump the jitcode to file"); // DEFINE_bool(dump_jitcode, false, "Whether to dump the jitcode to file");
bool dump_jitcode = paddle::lite::GetBoolFromEnv("dump_jitcode");
namespace paddle { namespace paddle {
namespace lite { namespace lite {
......
...@@ -20,7 +20,8 @@ ...@@ -20,7 +20,8 @@
#include <vector> #include <vector>
#include "lite/backends/x86/jit/kernel_base.h" #include "lite/backends/x86/jit/kernel_base.h"
DECLARE_bool(dump_jitcode); // DECLARE_bool(dump_jitcode);
extern bool dump_jitcode;
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -36,7 +37,7 @@ class GenBase : public Kernel { ...@@ -36,7 +37,7 @@ class GenBase : public Kernel {
template <typename Func> template <typename Func>
Func getCode() const { Func getCode() const {
const unsigned char* code = this->getCodeInternal(); const unsigned char* code = this->getCodeInternal();
if (FLAGS_dump_jitcode) { if (dump_jitcode) {
this->dumpCode(code); this->dumpCode(code);
} }
// Note: failed to cast with reinterpret_cast<const Func> on Mac clang, // Note: failed to cast with reinterpret_cast<const Func> on Mac clang,
......
...@@ -13,10 +13,13 @@ ...@@ -13,10 +13,13 @@
// limitations under the License. // limitations under the License.
#include "lite/kernels/x86/gru_compute.h" #include "lite/kernels/x86/gru_compute.h"
#include "lite/utils/env.h"
DEFINE_int32(paddle_num_threads, // DEFINE_int32(paddle_num_threads,
1, // 1,
"Number of threads for each paddle instance."); // "Number of threads for each paddle instance.");
int32_t paddle_num_threads =
paddle::lite::GetIntFromEnv("paddle_num_threads", 1);
REGISTER_LITE_KERNEL(gru, REGISTER_LITE_KERNEL(gru,
kX86, kX86,
......
...@@ -26,7 +26,8 @@ ...@@ -26,7 +26,8 @@
#include "lite/core/types.h" #include "lite/core/types.h"
#include "lite/fluid/eigen.h" #include "lite/fluid/eigen.h"
DECLARE_int32(paddle_num_threads); // DECLARE_int32(paddle_num_threads);
extern int32_t paddle_num_threads;
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -109,7 +110,7 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -109,7 +110,7 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
// use MKL packed to speedup GEMM // use MKL packed to speedup GEMM
if (FLAGS_paddle_num_threads >= 4) { if (paddle_num_threads >= 4) {
auto blas = lite::x86::math::GetBlas<TARGET(kX86), T>(context); auto blas = lite::x86::math::GetBlas<TARGET(kX86), T>(context);
T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix, T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix,
1 /*height of C*/, 1 /*height of C*/,
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <string>
namespace paddle {
namespace lite {
static std::string GetStringFromEnv(const std::string& str,
const std::string& def = "") {
char* variable = std::getenv(str.c_str());
if (!variable) {
return def;
}
return std::string(variable);
}
static bool GetBoolFromEnv(const std::string& str, bool def = false) {
char* variable = std::getenv(str.c_str());
if (!variable) {
return def;
}
if (strcmp(variable, "false") == 0 || strcmp(variable, "0") == 0) {
return false;
} else {
return true;
}
}
static int GetIntFromEnv(const std::string& str, int def = 0) {
char* variable = std::getenv(str.c_str());
if (!variable) {
return def;
}
return atoi(variable);
}
static double GetDoubleFromEnv(const std::string& str, double def = 0.0) {
char* variable = std::getenv(str.c_str());
if (!variable) {
return def;
}
return atof(variable);
}
static uint64_t GetUInt64FromEnv(const std::string& str, uint64_t def = 0ul) {
char* variable = std::getenv(str.c_str());
if (!variable) {
return def;
}
return static_cast<uint64_t>(atol(variable));
}
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册