提交 c6d230e0 编写于 作者: L Luo Tao

add FLAGS_use_mkldnn to global control use_mkldnn

上级 0aa9546e
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -115,6 +116,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, ...@@ -115,6 +116,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
bool create_local_scope, bool create_vars) { bool create_local_scope, bool create_vars) {
platform::RecordBlock b(block_id); platform::RecordBlock b(block_id);
if (FLAGS_use_mkldnn) EnableMKLDNN(pdesc);
auto ctx = Prepare(pdesc, block_id); auto ctx = Prepare(pdesc, block_id);
RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars); RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars);
} }
...@@ -214,6 +216,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -214,6 +216,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
const std::string& feed_holder_name, const std::string& feed_holder_name,
const std::string& fetch_holder_name) { const std::string& fetch_holder_name) {
platform::RecordBlock b(kProgramId); platform::RecordBlock b(kProgramId);
if (FLAGS_use_mkldnn) EnableMKLDNN(program);
bool has_feed_ops = bool has_feed_ops =
has_feed_operators(program.Block(0), *feed_targets, feed_holder_name); has_feed_operators(program.Block(0), *feed_targets, feed_holder_name);
bool has_fetch_ops = bool has_fetch_ops =
...@@ -225,7 +228,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -225,7 +228,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
unique_ptr_of_copy_program.reset(new ProgramDesc(program)); unique_ptr_of_copy_program.reset(new ProgramDesc(program));
copy_program = unique_ptr_of_copy_program.get(); copy_program = unique_ptr_of_copy_program.get();
} }
auto* global_block = copy_program->MutableBlock(0); auto* global_block = copy_program->MutableBlock(0);
if (!has_feed_ops) { if (!has_feed_ops) {
...@@ -378,5 +380,19 @@ void Executor::RunPreparedContext( ...@@ -378,5 +380,19 @@ void Executor::RunPreparedContext(
} }
} }
void Executor::EnableMKLDNN(const ProgramDesc& program) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(3) << "use_mkldnn=True";
for (size_t bid = 0; bid < program.Size(); ++bid) {
auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
for (auto* op : block->AllOps()) {
if (op->HasAttr("use_mkldnn")) {
op->SetAttr("use_mkldnn", true);
}
}
}
#endif
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -81,6 +81,8 @@ class Executor { ...@@ -81,6 +81,8 @@ class Executor {
const std::string& feed_holder_name = "feed", const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch"); const std::string& fetch_holder_name = "fetch");
void EnableMKLDNN(const ProgramDesc& program);
private: private:
const platform::Place place_; const platform::Place place_;
}; };
......
...@@ -21,7 +21,6 @@ DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model."); ...@@ -21,7 +21,6 @@ DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model.");
DEFINE_int32(batch_size, 1, "Batch size of input data"); DEFINE_int32(batch_size, 1, "Batch size of input data");
DEFINE_int32(repeat, 1, "Running the inference program repeat times"); DEFINE_int32(repeat, 1, "Running the inference program repeat times");
DEFINE_bool(skip_cpu, false, "Skip the cpu test"); DEFINE_bool(skip_cpu, false, "Skip the cpu test");
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference");
TEST(inference, image_classification) { TEST(inference, image_classification) {
if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) { if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) {
...@@ -59,10 +58,8 @@ TEST(inference, image_classification) { ...@@ -59,10 +58,8 @@ TEST(inference, image_classification) {
// Run inference on CPU // Run inference on CPU
LOG(INFO) << "--- CPU Runs: ---"; LOG(INFO) << "--- CPU Runs: ---";
LOG(INFO) << "Batch size is " << FLAGS_batch_size; LOG(INFO) << "Batch size is " << FLAGS_batch_size;
LOG(INFO) << "FLAGS_use_mkldnn: " << FLAGS_use_mkldnn;
TestInference<paddle::platform::CPUPlace, false, true>( TestInference<paddle::platform::CPUPlace, false, true>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined, dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined);
FLAGS_use_mkldnn);
LOG(INFO) << output1.dims(); LOG(INFO) << output1.dims();
} }
......
...@@ -27,7 +27,6 @@ limitations under the License. */ ...@@ -27,7 +27,6 @@ limitations under the License. */
DEFINE_string(model_path, "", "Directory of the inference model."); DEFINE_string(model_path, "", "Directory of the inference model.");
DEFINE_string(data_file, "", "File of input index data."); DEFINE_string(data_file, "", "File of input index data.");
DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_int32(repeat, 100, "Running the inference program repeat times");
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference");
DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
DEFINE_int32(num_threads, 1, "Number of threads should be used"); DEFINE_int32(num_threads, 1, "Number of threads should be used");
...@@ -190,9 +189,6 @@ TEST(inference, nlp) { ...@@ -190,9 +189,6 @@ TEST(inference, nlp) {
std::unique_ptr<paddle::framework::ProgramDesc> inference_program; std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
inference_program = InitProgram(&executor, scope.get(), FLAGS_model_path, inference_program = InitProgram(&executor, scope.get(), FLAGS_model_path,
/*model combined*/ false); /*model combined*/ false);
if (FLAGS_use_mkldnn) {
EnableMKLDNN(inference_program);
}
// always prepare context // always prepare context
std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx; std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx;
ctx = executor.Prepare(*inference_program, 0); ctx = executor.Prepare(*inference_program, 0);
......
...@@ -22,6 +22,8 @@ limitations under the License. */ ...@@ -22,6 +22,8 @@ limitations under the License. */
#include "paddle/fluid/inference/io.h" #include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DECLARE_bool(use_mkldnn);
template <typename T> template <typename T>
void SetupTensor(paddle::framework::LoDTensor* input, void SetupTensor(paddle::framework::LoDTensor* input,
paddle::framework::DDim dims, T lower, T upper) { paddle::framework::DDim dims, T lower, T upper) {
...@@ -133,24 +135,11 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes( ...@@ -133,24 +135,11 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes(
return feed_target_shapes; return feed_target_shapes;
} }
void EnableMKLDNN(
const std::unique_ptr<paddle::framework::ProgramDesc>& program) {
for (size_t bid = 0; bid < program->Size(); ++bid) {
auto* block = program->MutableBlock(bid);
for (auto* op : block->AllOps()) {
if (op->HasAttr("use_mkldnn")) {
op->SetAttr("use_mkldnn", true);
}
}
}
}
template <typename Place, bool CreateVars = true, bool PrepareContext = false> template <typename Place, bool CreateVars = true, bool PrepareContext = false>
void TestInference(const std::string& dirname, void TestInference(const std::string& dirname,
const std::vector<paddle::framework::LoDTensor*>& cpu_feeds, const std::vector<paddle::framework::LoDTensor*>& cpu_feeds,
const std::vector<paddle::framework::LoDTensor*>& cpu_fetchs, const std::vector<paddle::framework::LoDTensor*>& cpu_fetchs,
const int repeat = 1, const bool is_combined = false, const int repeat = 1, const bool is_combined = false) {
const bool use_mkldnn = false) {
// 1. Define place, executor, scope // 1. Define place, executor, scope
auto place = Place(); auto place = Place();
auto executor = paddle::framework::Executor(place); auto executor = paddle::framework::Executor(place);
...@@ -182,9 +171,6 @@ void TestInference(const std::string& dirname, ...@@ -182,9 +171,6 @@ void TestInference(const std::string& dirname,
"init_program", "init_program",
paddle::platform::DeviceContextPool::Instance().Get(place)); paddle::platform::DeviceContextPool::Instance().Get(place));
inference_program = InitProgram(&executor, scope, dirname, is_combined); inference_program = InitProgram(&executor, scope, dirname, is_combined);
if (use_mkldnn) {
EnableMKLDNN(inference_program);
}
} }
// Disable the profiler and print the timing information // Disable the profiler and print the timing information
paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault,
...@@ -210,7 +196,10 @@ void TestInference(const std::string& dirname, ...@@ -210,7 +196,10 @@ void TestInference(const std::string& dirname,
fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; fetch_targets[fetch_target_names[i]] = cpu_fetchs[i];
} }
// 6. Run the inference program // 6. If export Flags_use_mkldnn=True, use mkldnn related ops.
if (FLAGS_use_mkldnn) executor.EnableMKLDNN(*inference_program);
// 7. Run the inference program
{ {
if (!CreateVars) { if (!CreateVars) {
// If users don't want to create and destroy variables every time they // If users don't want to create and destroy variables every time they
......
...@@ -30,7 +30,7 @@ int main(int argc, char** argv) { ...@@ -30,7 +30,7 @@ int main(int argc, char** argv) {
new_argv.push_back( new_argv.push_back(
strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
#else #else
new_argv.push_back(strdup("--tryfromenv=use_pinned_memory")); new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
#endif #endif
int new_argc = static_cast<int>(new_argv.size()); int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data(); char** new_argv_address = new_argv.data();
......
...@@ -116,7 +116,7 @@ def __bootstrap__(): ...@@ -116,7 +116,7 @@ def __bootstrap__():
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope' 'eager_delete_scope', 'use_mkldnn'
] ]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
read_env_flags += [ read_env_flags += [
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册