未验证 提交 076f8331 编写于 作者: T Tao Luo 提交者: GitHub

add config.SetMkldnnCacheCapacity api for mkldnn cache clear strategy (#18580)

* add config.SetMkldnnCacheCapacity api for mkldnn cache clear strategy

test=develop

* enhance MkldnnPostReset

test=develop

* add comments for mkldnn_cache_capacity field

test=develop
上级 a20b2b43
......@@ -148,6 +148,8 @@ struct Argument {
// Pass a set of op types to enable its mkldnn kernel
DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes,
std::unordered_set<std::string>);
// The cache capacity of different input shapes for mkldnn.
DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int);
#ifdef PADDLE_WITH_MKLDNN
// A set of op types to enable their quantized kernels
......
......@@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related.
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_cache_capacity_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
......@@ -162,6 +163,15 @@ void AnalysisConfig::EnableMKLDNN() {
Update();
}
void AnalysisConfig::SetMkldnnCacheCapacity(int capacity) {
#ifdef PADDLE_WITH_MKLDNN
mkldnn_cache_capacity_ = capacity;
#else
LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id";
mkldnn_cache_capacity_ = 0;
#endif
}
void AnalysisConfig::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
if (!mkldnn_quantizer_config_)
......@@ -343,6 +353,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << use_ngraph_;
ss << use_mkldnn_;
ss << mkldnn_cache_capacity_;
for (auto &item : mkldnn_enabled_op_types_) ss << item;
ss << ";";
......
......@@ -185,10 +185,49 @@ bool AnalysisPredictor::PrepareExecutor() {
return true;
}
void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
<< platform::get_cur_mkldnn_session_id();
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
VLOG(2) << "In mkldnn cache clear mode.";
platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_CacheClearing);
platform::set_cur_input_shape_cache_capacity(
config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape.
std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
ss << inputs[i].shape[j] << "-";
}
}
VLOG(2) << "Set input shape=" << ss.str();
platform::set_cur_input_shape_str(ss.str());
}
#endif
}
void AnalysisPredictor::MkldnnPostReset() {
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
paddle::platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_Default);
platform::set_cur_input_shape_cache_capacity(0);
platform::set_cur_input_shape_str("");
}
#endif
}
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPreSet(inputs);
#endif
VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
......@@ -230,7 +269,9 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
return true;
}
......@@ -595,7 +636,6 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
return true;
}
......
......@@ -109,6 +109,11 @@ class AnalysisPredictor : public PaddlePredictor {
template <typename T>
void GetFetchOne(const framework::LoDTensor &fetchs,
PaddleTensor *output_data);
// PreSet and PostReset for Mkldnn multi-thread and dynamic shape input.
// Used in AnalysisPredictor::Run(), do not support
// AnalysisPredictor::ZeroRun() now.
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
void MkldnnPostReset();
#if PADDLE_WITH_TENSORRT
// When we use Paddle-TRT INT8 engine, we need to generate calibration table
......
......@@ -184,6 +184,10 @@ struct AnalysisConfig {
/** Turn on MKLDNN.
*/
void EnableMKLDNN();
/** set the cache capacity of different input shapes for MKLDNN.
* Default 0 means don't cache any shape.
*/
void SetMkldnnCacheCapacity(int capacity);
/** A boolean state telling whether to use the MKLDNN.
*/
bool mkldnn_enabled() const { return use_mkldnn_; }
......@@ -316,8 +320,11 @@ struct AnalysisConfig {
std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_;
// mkldnn related.
int mkldnn_cache_capacity_{0};
bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
......
......@@ -173,12 +173,47 @@ TEST(Analyzer_MM_DNN, compare_determine) {
}
#ifdef PADDLE_WITH_MKLDNN
void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity,
std::vector<std::vector<PaddleTensor>> *outputs) {
AnalysisConfig config;
SetConfig(&config);
config.EnableMKLDNN();
// TODO(luotao): explicit following settings will be deprecated after enhance
// config.EnableMKLDNN() interface.
config.SetMkldnnCacheCapacity(mkldnn_input_shape_cache_capacity);
std::vector<PaddleTensor> input;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
int sample_num = 10;
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
outputs->resize(sample_num);
for (int i = 0; i < sample_num; i++) {
PrepareInputs(&input, &data, FLAGS_batch_size);
predictor->Run(input, &(*outputs)[i], 1);
}
}
TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
std::vector<std::vector<PaddleTensor>> outputs, cache_outputs;
// 0 means do not use cache clear strategy.
TestMkldnnCacheClear(0, &outputs);
// 4 means use cache clear strategy, and the
// mkldnn_input_shape_cache_capacity is 4.
TestMkldnnCacheClear(4, &cache_outputs);
// compare the result.
for (size_t i = 0; i < outputs.size(); i++) {
CompareResult(outputs[i], cache_outputs[i]);
}
}
void TestMkldnnShapeBlobSize(int mkldnn_input_shape_cache_capacity) {
AnalysisConfig config;
SetConfig(&config);
config.EnableMKLDNN();
config.SwitchUseFeedFetchOps(false);
// Since AnalysisPredictor::Run() will reset cur_mkldnn_session_id to default
// before its finished, we use AnalysisPredictor::ZeroCopyRun() here to check
// the mkldnn_shape_blob_size.
if (mkldnn_input_shape_cache_capacity > 0) {
platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_CacheClearing);
......@@ -186,7 +221,7 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
mkldnn_input_shape_cache_capacity);
}
std::vector<PaddleTensor> input, output;
std::vector<PaddleTensor> input;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
int sample_num = 10;
......@@ -195,8 +230,12 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
auto &pool = platform::DeviceContextPool::Instance();
auto *dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext *>(
pool.Get(platform::CPUPlace()));
// clear before test
dev_ctx->ResetBlobMap();
for (int i = 0; i < sample_num; i++) {
PrepareInputs(&input, &data, FLAGS_batch_size);
ConvertPaddleTensorToZeroCopyTensor(predictor.get(), input);
if (mkldnn_input_shape_cache_capacity > 0) {
std::stringstream ss;
for (size_t i = 0; i < input.size(); i++) {
......@@ -204,11 +243,9 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
ss << input[i].shape[j] << "-";
}
}
// TODO(luotao): explicit following settings will be deprecated after
// enhance config.EnableMKLDNN() interface.
platform::set_cur_input_shape_str(ss.str());
}
predictor->Run(input, &output, 1);
predictor->ZeroCopyRun();
}
if (mkldnn_input_shape_cache_capacity > 0) {
PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(),
......@@ -216,15 +253,14 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
} else {
PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL);
}
dev_ctx->ResetBlobMap();
}
TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
TEST(Analyzer_MM_DNN, mkldnn_shape_blob_size) {
// 0 means do not use cache clear strategy.
TestMkldnnCacheClear(0);
TestMkldnnShapeBlobSize(0);
// 4 means use cache clear strategy, and the
// mkldnn_input_shape_cache_capacity is 4.
TestMkldnnCacheClear(4);
TestMkldnnShapeBlobSize(4);
}
#endif
......
......@@ -462,7 +462,8 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
if (key_it == sBlob->end()) {
// In cache clearing mode, cur_input_shape_cache_capacity defines
// max pblob capacity
if ((sid == kMKLDNNSessionID_CacheClearing) &&
if ((static_cast<size_t>(sid) == kMKLDNNSessionID_CacheClearing) &&
sBlob->size() &&
(sBlob->size() >=
static_cast<size_t>(cur_input_shape_cache_capacity))) {
VLOG(2) << "sid=" << sid
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册