未验证 提交 076f8331 编写于 作者: T Tao Luo 提交者: GitHub

add config.SetMkldnnCacheCapacity api for mkldnn cache clear strategy (#18580)

* add config.SetMkldnnCacheCapacity api for mkldnn cache clear strategy

test=develop

* enhance MkldnnPostReset

test=develop

* add comments for mkldnn_cache_capacity field

test=develop
上级 a20b2b43
...@@ -148,6 +148,8 @@ struct Argument { ...@@ -148,6 +148,8 @@ struct Argument {
// Pass a set of op types to enable its mkldnn kernel // Pass a set of op types to enable its mkldnn kernel
DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes, DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes,
std::unordered_set<std::string>); std::unordered_set<std::string>);
// The cache capacity of different input shapes for mkldnn.
DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int);
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// A set of op types to enable their quantized kernels // A set of op types to enable their quantized kernels
......
...@@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related. // MKLDNN related.
CP_MEMBER(use_mkldnn_); CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_); CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_cache_capacity_);
// Quantization related. // Quantization related.
CP_MEMBER(use_mkldnn_quantizer_); CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_); CP_MEMBER(mkldnn_quantizer_config_);
...@@ -162,6 +163,15 @@ void AnalysisConfig::EnableMKLDNN() { ...@@ -162,6 +163,15 @@ void AnalysisConfig::EnableMKLDNN() {
Update(); Update();
} }
void AnalysisConfig::SetMkldnnCacheCapacity(int capacity) {
#ifdef PADDLE_WITH_MKLDNN
mkldnn_cache_capacity_ = capacity;
#else
LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id";
mkldnn_cache_capacity_ = 0;
#endif
}
void AnalysisConfig::EnableMkldnnQuantizer() { void AnalysisConfig::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (!mkldnn_quantizer_config_) if (!mkldnn_quantizer_config_)
...@@ -343,6 +353,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -343,6 +353,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << use_ngraph_; ss << use_ngraph_;
ss << use_mkldnn_; ss << use_mkldnn_;
ss << mkldnn_cache_capacity_;
for (auto &item : mkldnn_enabled_op_types_) ss << item; for (auto &item : mkldnn_enabled_op_types_) ss << item;
ss << ";"; ss << ";";
......
...@@ -185,10 +185,49 @@ bool AnalysisPredictor::PrepareExecutor() { ...@@ -185,10 +185,49 @@ bool AnalysisPredictor::PrepareExecutor() {
return true; return true;
} }
void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
<< platform::get_cur_mkldnn_session_id();
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
VLOG(2) << "In mkldnn cache clear mode.";
platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_CacheClearing);
platform::set_cur_input_shape_cache_capacity(
config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape.
std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
ss << inputs[i].shape[j] << "-";
}
}
VLOG(2) << "Set input shape=" << ss.str();
platform::set_cur_input_shape_str(ss.str());
}
#endif
}
void AnalysisPredictor::MkldnnPostReset() {
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
paddle::platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_Default);
platform::set_cur_input_shape_cache_capacity(0);
platform::set_cur_input_shape_str("");
}
#endif
}
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, std::vector<PaddleTensor> *output_data,
int batch_size) { int batch_size) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPreSet(inputs);
#endif
VLOG(3) << "Predictor::predict"; VLOG(3) << "Predictor::predict";
inference::Timer timer; inference::Timer timer;
timer.tic(); timer.tic();
...@@ -230,7 +269,9 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -230,7 +269,9 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// recover the cpu_math_library_num_threads to 1, in order to avoid thread // recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service. // conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1); paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
return true; return true;
} }
...@@ -595,7 +636,6 @@ bool AnalysisPredictor::ZeroCopyRun() { ...@@ -595,7 +636,6 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread // recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service. // conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1); paddle::platform::SetNumThreads(1);
return true; return true;
} }
......
...@@ -109,6 +109,11 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -109,6 +109,11 @@ class AnalysisPredictor : public PaddlePredictor {
template <typename T> template <typename T>
void GetFetchOne(const framework::LoDTensor &fetchs, void GetFetchOne(const framework::LoDTensor &fetchs,
PaddleTensor *output_data); PaddleTensor *output_data);
// PreSet and PostReset for Mkldnn multi-thread and dynamic shape input.
// Used in AnalysisPredictor::Run(), do not support
// AnalysisPredictor::ZeroRun() now.
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
void MkldnnPostReset();
#if PADDLE_WITH_TENSORRT #if PADDLE_WITH_TENSORRT
// When we use Paddle-TRT INT8 engine, we need to generate calibration table // When we use Paddle-TRT INT8 engine, we need to generate calibration table
......
...@@ -184,6 +184,10 @@ struct AnalysisConfig { ...@@ -184,6 +184,10 @@ struct AnalysisConfig {
/** Turn on MKLDNN. /** Turn on MKLDNN.
*/ */
void EnableMKLDNN(); void EnableMKLDNN();
/** set the cache capacity of different input shapes for MKLDNN.
* Default 0 means don't cache any shape.
*/
void SetMkldnnCacheCapacity(int capacity);
/** A boolean state telling whether to use the MKLDNN. /** A boolean state telling whether to use the MKLDNN.
*/ */
bool mkldnn_enabled() const { return use_mkldnn_; } bool mkldnn_enabled() const { return use_mkldnn_; }
...@@ -316,8 +320,11 @@ struct AnalysisConfig { ...@@ -316,8 +320,11 @@ struct AnalysisConfig {
std::vector<std::string> anakin_passes_filter_; std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_; std::vector<std::string> anakin_ops_filter_;
// mkldnn related.
int mkldnn_cache_capacity_{0};
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_; std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
// If the config is already used on a predictor, it becomes invalid. // If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor. // Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases. // Variables held by config can take up a lot of memory in some cases.
......
...@@ -173,12 +173,47 @@ TEST(Analyzer_MM_DNN, compare_determine) { ...@@ -173,12 +173,47 @@ TEST(Analyzer_MM_DNN, compare_determine) {
} }
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity,
std::vector<std::vector<PaddleTensor>> *outputs) {
AnalysisConfig config; AnalysisConfig config;
SetConfig(&config); SetConfig(&config);
config.EnableMKLDNN(); config.EnableMKLDNN();
// TODO(luotao): explicit following settings will be deprecated after enhance config.SetMkldnnCacheCapacity(mkldnn_input_shape_cache_capacity);
// config.EnableMKLDNN() interface.
std::vector<PaddleTensor> input;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
int sample_num = 10;
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
outputs->resize(sample_num);
for (int i = 0; i < sample_num; i++) {
PrepareInputs(&input, &data, FLAGS_batch_size);
predictor->Run(input, &(*outputs)[i], 1);
}
}
TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
std::vector<std::vector<PaddleTensor>> outputs, cache_outputs;
// 0 means do not use cache clear strategy.
TestMkldnnCacheClear(0, &outputs);
// 4 means use cache clear strategy, and the
// mkldnn_input_shape_cache_capacity is 4.
TestMkldnnCacheClear(4, &cache_outputs);
// compare the result.
for (size_t i = 0; i < outputs.size(); i++) {
CompareResult(outputs[i], cache_outputs[i]);
}
}
void TestMkldnnShapeBlobSize(int mkldnn_input_shape_cache_capacity) {
AnalysisConfig config;
SetConfig(&config);
config.EnableMKLDNN();
config.SwitchUseFeedFetchOps(false);
// Since AnalysisPredictor::Run() will reset cur_mkldnn_session_id to default
// before its finished, we use AnalysisPredictor::ZeroCopyRun() here to check
// the mkldnn_shape_blob_size.
if (mkldnn_input_shape_cache_capacity > 0) { if (mkldnn_input_shape_cache_capacity > 0) {
platform::set_cur_mkldnn_session_id( platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_CacheClearing); platform::kMKLDNNSessionID_CacheClearing);
...@@ -186,7 +221,7 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { ...@@ -186,7 +221,7 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
mkldnn_input_shape_cache_capacity); mkldnn_input_shape_cache_capacity);
} }
std::vector<PaddleTensor> input, output; std::vector<PaddleTensor> input;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config); auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
int sample_num = 10; int sample_num = 10;
...@@ -195,8 +230,12 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { ...@@ -195,8 +230,12 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
auto &pool = platform::DeviceContextPool::Instance(); auto &pool = platform::DeviceContextPool::Instance();
auto *dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext *>( auto *dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext *>(
pool.Get(platform::CPUPlace())); pool.Get(platform::CPUPlace()));
// clear before test
dev_ctx->ResetBlobMap();
for (int i = 0; i < sample_num; i++) { for (int i = 0; i < sample_num; i++) {
PrepareInputs(&input, &data, FLAGS_batch_size); PrepareInputs(&input, &data, FLAGS_batch_size);
ConvertPaddleTensorToZeroCopyTensor(predictor.get(), input);
if (mkldnn_input_shape_cache_capacity > 0) { if (mkldnn_input_shape_cache_capacity > 0) {
std::stringstream ss; std::stringstream ss;
for (size_t i = 0; i < input.size(); i++) { for (size_t i = 0; i < input.size(); i++) {
...@@ -204,11 +243,9 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { ...@@ -204,11 +243,9 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
ss << input[i].shape[j] << "-"; ss << input[i].shape[j] << "-";
} }
} }
// TODO(luotao): explicit following settings will be deprecated after
// enhance config.EnableMKLDNN() interface.
platform::set_cur_input_shape_str(ss.str()); platform::set_cur_input_shape_str(ss.str());
} }
predictor->Run(input, &output, 1); predictor->ZeroCopyRun();
} }
if (mkldnn_input_shape_cache_capacity > 0) { if (mkldnn_input_shape_cache_capacity > 0) {
PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(),
...@@ -216,15 +253,14 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { ...@@ -216,15 +253,14 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
} else { } else {
PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL); PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL);
} }
dev_ctx->ResetBlobMap();
} }
TEST(Analyzer_MM_DNN, mkldnn_cache_clear) { TEST(Analyzer_MM_DNN, mkldnn_shape_blob_size) {
// 0 means do not use cache clear strategy. // 0 means do not use cache clear strategy.
TestMkldnnCacheClear(0); TestMkldnnShapeBlobSize(0);
// 4 means use cache clear strategy, and the // 4 means use cache clear strategy, and the
// mkldnn_input_shape_cache_capacity is 4. // mkldnn_input_shape_cache_capacity is 4.
TestMkldnnCacheClear(4); TestMkldnnShapeBlobSize(4);
} }
#endif #endif
......
...@@ -462,7 +462,8 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, ...@@ -462,7 +462,8 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
if (key_it == sBlob->end()) { if (key_it == sBlob->end()) {
// In cache clearing mode, cur_input_shape_cache_capacity defines // In cache clearing mode, cur_input_shape_cache_capacity defines
// max pblob capacity // max pblob capacity
if ((sid == kMKLDNNSessionID_CacheClearing) && if ((static_cast<size_t>(sid) == kMKLDNNSessionID_CacheClearing) &&
sBlob->size() &&
(sBlob->size() >= (sBlob->size() >=
static_cast<size_t>(cur_input_shape_cache_capacity))) { static_cast<size_t>(cur_input_shape_cache_capacity))) {
VLOG(2) << "sid=" << sid VLOG(2) << "sid=" << sid
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册