diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc index d9dcef62237e114621ab4f0616c7cca3dcbafc7a..1d0727b80baf7558eb5f391257248a004b1d9f61 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/data_transfer.cc @@ -24,7 +24,7 @@ bool DataTranferHelper::apply(const OpKernelType& kernel_type_for_var, const std::string& var_name, std::string* new_var_name, std::vector* op_func_nodes, - bool use_local_scope) { + bool use_local_scope, bool is_fetch_v2) { bool is_transferred = false; auto* src_var_name = &var_name; @@ -35,8 +35,11 @@ bool DataTranferHelper::apply(const OpKernelType& kernel_type_for_var, if (need_layout_transform(kernel_type_for_var, expected_kernel_key)) { auto op = TransferLayout( *src_var_name, new_var_name, kernel_type_for_var.data_layout_, - expected_kernel_key.data_layout_, var_scope_, local_scope); - RunAndConstructOpFuncNode(op, *src_var_name, *new_var_name, op_func_nodes); + expected_kernel_key.data_layout_, var_scope_, local_scope, is_fetch_v2); + if (op) { + RunAndConstructOpFuncNode(op, *src_var_name, *new_var_name, + op_func_nodes); + } // update src_var_name src_var_name = new_var_name; is_transferred = true; @@ -46,7 +49,10 @@ bool DataTranferHelper::apply(const OpKernelType& kernel_type_for_var, auto op = TransferDtype( *src_var_name, new_var_name, kernel_type_for_var.data_type_, expected_kernel_key.data_type_, var_scope_, local_scope); - RunAndConstructOpFuncNode(op, *src_var_name, *new_var_name, op_func_nodes); + if (op) { + RunAndConstructOpFuncNode(op, *src_var_name, *new_var_name, + op_func_nodes); + } // update src_var_name src_var_name = new_var_name; is_transferred = true; @@ -55,9 +61,13 @@ bool DataTranferHelper::apply(const OpKernelType& kernel_type_for_var, if (need_device_transform(kernel_type_for_var, expected_kernel_key)) { auto src_place = kernel_type_for_var.place_; auto dst_place = expected_kernel_key.place_; + auto op = TransferDevice(*src_var_name, new_var_name, src_place, dst_place, var_scope_, local_scope); - RunAndConstructOpFuncNode(op, *src_var_name, *new_var_name, op_func_nodes); + if (op) { + RunAndConstructOpFuncNode(op, *src_var_name, *new_var_name, + op_func_nodes); + } is_transferred = true; } return is_transferred; @@ -128,17 +138,44 @@ void DataTranferHelper::RunAndConstructOpFuncNode( new_op_func_nodes->emplace_back(std::move(new_op_func_node)); } -std::shared_ptr TransferLayout(const std::string& var_name, - std::string* new_var_name, - DataLayout in_layout, - DataLayout out_layout, - VariableScope* var_scope, - framework::Scope* local_scope) { +// Var is initialized && var contains tensor && tensor is initialized +bool IsTensorOfVarInitialized(Variable* var) { + if (var->IsInitialized()) { + if (var->IsType() || var->IsType()) { + return GetLoDTensorOrSelectedRowsValueFromVar(*var)->IsInitialized(); + } else if (var->IsType()) { + return static_cast(&(var->Get()[0])) + ->IsInitialized(); + } + } + return false; +} + +std::shared_ptr TransferLayout( + const std::string& var_name, std::string* new_var_name, + DataLayout in_layout, DataLayout out_layout, VariableScope* var_scope, + framework::Scope* local_scope, bool is_fetch_v2) { +#ifdef PADDLE_WITH_MKLDNN + // NOTE(zhiqiu): hot fix, follow the same logic in DataCopy() in fetch_op.cc + if (in_layout == framework::DataLayout::kMKLDNN && + var_name == framework::GradVarName("Filter") && is_fetch_v2) { + out_layout = framework::DataLayout::kNCHW; + } +#endif + // 1. Generate new_var_name and Initialize it - *new_var_name = - var_name + "_layout_" + std::to_string(var_scope->VarSize() + 1); - auto* ptr = local_scope->Var(*new_var_name); + *new_var_name = var_name + "_layout_" + + std::to_string(static_cast(in_layout)) + "_" + + std::to_string(static_cast(out_layout)); + + if (var_scope->HasVar(*new_var_name) && + IsTensorOfVarInitialized(var_scope->Var(*new_var_name))) { + // already has same var + VLOG(4) << "Use cached variable: " << *new_var_name; + return nullptr; + } + auto* ptr = local_scope->Var(*new_var_name); auto var_type = var_scope->Var(var_name)->Type(); InitializeVariable(ptr, static_cast(var_type)); VLOG(3) << "Create Variable " << *new_var_name @@ -171,10 +208,17 @@ std::shared_ptr TransferDtype(const std::string& var_name, VariableScope* var_scope, framework::Scope* local_scope) { // 1. Generate new_var_name and Initialize it - *new_var_name = - var_name + "_dtype_" + std::to_string(var_scope->VarSize() + 1); - auto* ptr = local_scope->Var(*new_var_name); + *new_var_name = var_name + "_dtype_" + + std::to_string(static_cast(in_dtype)) + "_" + + std::to_string(static_cast(out_dtype)); + if (var_scope->HasVar(*new_var_name) && + IsTensorOfVarInitialized(var_scope->Var(*new_var_name))) { + // already has same var + VLOG(4) << "Use cached variable: " << *new_var_name; + return nullptr; + } + auto* ptr = local_scope->Var(*new_var_name); auto var_type = var_scope->Var(var_name)->Type(); InitializeVariable(ptr, static_cast(var_type)); @@ -211,10 +255,17 @@ std::shared_ptr TransferDevice(const std::string& var_name, VariableScope* var_scope, framework::Scope* local_scope) { // 1. Generate new_var_name and Initialize it - *new_var_name = - var_name + "_device_" + std::to_string(var_scope->VarSize() + 1); - auto* ptr = local_scope->Var(*new_var_name); + *new_var_name = var_name + "_device_" + src_place.DebugString() + "_" + + dst_place.DebugString(); + + if (var_scope->HasVar(*new_var_name) && + IsTensorOfVarInitialized(var_scope->Var(*new_var_name))) { + // already has same var + VLOG(4) << "Use cached variable: " << *new_var_name; + return nullptr; + } + auto* ptr = local_scope->Var(*new_var_name); auto var_type = var_scope->Var(var_name)->Type(); InitializeVariable(ptr, static_cast(var_type)); VLOG(3) << "Create Variable " << *new_var_name @@ -258,12 +309,28 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, // record the no need transform variable index. std::unordered_set no_data_transform_index; + const std::unordered_set* no_buffer_ins = nullptr; + auto& no_buffer_inferer = op_base->Info().NoNeedBufferVarsInferer(); + if (no_buffer_inferer) { + no_buffer_ins = &(no_buffer_inferer(op_base->Inputs(), op_base->Outputs(), + op_base->Attrs())); + if (no_buffer_ins->empty()) { + no_buffer_ins = nullptr; + } + } + DataTranferHelper data_transfer_helper(place, var_scope); for (auto& var_name_item : *ins_map_temp) { + bool should_skip_input = + no_buffer_ins && no_buffer_ins->count(var_name_item.first) > 0; + for (size_t i = 0; i < var_name_item.second.size(); ++i) { auto var = var_name_item.second[i]; auto var_name = new_ins[var_name_item.first].at(i); const Tensor* tensor_in; + std::string new_var_name; + bool is_transferred = false; + if (var->IsType() || var->IsType()) { tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); } else if (var->IsType()) { @@ -272,18 +339,54 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, } else { continue; } + // special case if (!tensor_in->IsInitialized()) { - continue; + if (should_skip_input == true) { +#ifdef PADDLE_WITH_MKLDNN + // Var without buffer may be needed + // for some situation like InferShape(). + // In this situation We cannot skip Var analysis, as + // MKL-DNN shape of Var may differ from kNHWC Var + // In such situation corressponding resized Var + // has to be created and registered + if ((tensor_in->layout() == DataLayout::kMKLDNN) && + (var->IsType() == true) && + (expected_kernel_key.data_layout_ != DataLayout::kMKLDNN) && + (paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == DataLayout::kNHWC)) { + VLOG(7) << "Created reshaped dummy input based on MKL-DNN Tensor , " + "but kNHWC layout" + << var_name_item.first << " in Operator " + << op_base->Type(); + Scope* local_scope = use_local_scope + ? var_scope->GetMutableLocalScope() + : var_scope->GetMutableScope(); + auto op = TransferLayout( + var_name, &new_var_name, tensor_in->layout(), DataLayout::kNHWC, + var_scope, local_scope, op_base->Type() == "fetch_v2"); + if (op) { + data_transfer_helper.RunAndConstructOpFuncNode( + op, var_name, new_var_name, new_op_func_nodes); + } + is_transferred = true; + } else { + VLOG(7) << "Skip scanning input " << var_name_item.first + << " in Operator " << op_base->Type(); + } +#endif + } else { + continue; + } + } else { + auto kernel_type_for_var = + static_cast(op_base) + ->GetKernelTypeForVar(var_name_item.first, *tensor_in, + expected_kernel_key); + // apply data transform + is_transferred = data_transfer_helper.apply( + kernel_type_for_var, expected_kernel_key, var_name, &new_var_name, + new_op_func_nodes, use_local_scope, op_base->Type() == "fetch_v2"); } - auto kernel_type_for_var = - static_cast(op_base) - ->GetKernelTypeForVar(var_name_item.first, *tensor_in, - expected_kernel_key); - // apply data transform - std::string new_var_name; - bool is_transferred = data_transfer_helper.apply( - kernel_type_for_var, expected_kernel_key, var_name, &new_var_name, - new_op_func_nodes, use_local_scope); if (is_transferred) { // update RuntimeContext.inputs and original op_func_node inputs diff --git a/paddle/fluid/framework/new_executor/data_transfer.h b/paddle/fluid/framework/new_executor/data_transfer.h index 1c48018927934cf5987f9bee232ecf9d9ba38bd1..9525ba5bc8f279119204bd2469200c2d973a1f52 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.h +++ b/paddle/fluid/framework/new_executor/data_transfer.h @@ -35,7 +35,8 @@ class DataTranferHelper { bool apply(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_key, const std::string& var_name, std::string* new_var_name, - std::vector* new_op_func_nodes, bool use_local_scope); + std::vector* new_op_func_nodes, bool use_local_scope, + bool is_fetch_v2); void RunAndConstructShareNode(const std::string& src_var_name, const std::string& dst_var_name, @@ -94,12 +95,10 @@ inline bool need_layout_transform(const OpKernelType& kernel_type_for_var, expected_kernel_key.data_layout_); } -std::shared_ptr TransferLayout(const std::string& var_name, - std::string* new_var_name, - DataLayout in_layout, - DataLayout out_layout, - VariableScope* var_scope, - framework::Scope* local_scope); +std::shared_ptr TransferLayout( + const std::string& var_name, std::string* new_var_name, + DataLayout in_layout, DataLayout out_layout, VariableScope* var_scope, + framework::Scope* local_scope, bool is_fetch_v2); std::shared_ptr TransferDtype(const std::string& var_name, std::string* new_var_name, diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index d956f23242d4b52c9def4f9a5daa244cc6cb9523..e30dd21fc5c0ed44f526a352303aed1aa94f52ab 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -22,6 +22,9 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/os_info.h" #include "paddle/fluid/platform/profiler/event_tracing.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif PADDLE_DEFINE_EXPORTED_bool(new_executor_use_inplace, true, "Use inplace in new executor"); @@ -55,6 +58,7 @@ InterpreterCore::InterpreterCore(const platform::Place& place, block_(block), global_scope_(global_scope), stream_analyzer_(place) { + VLOG(4) << "InterpreterCore(): " << this << " on " << place_; is_build_ = false; async_work_queue_.reset(new interpreter::AsyncWorkQueue( kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_)); @@ -92,6 +96,14 @@ InterpreterCore::~InterpreterCore() { gc_.reset(nullptr); async_work_queue_.reset(nullptr); + VLOG(4) << "~InterpreterCore(): " << this; + VLOG(4) << " on" << place_; + +#ifdef PADDLE_WITH_MKLDNN + // Clear mkl-dnn cache, + // this is needed to have mkl-dnn unit tests working + platform::ClearMKLDNNCache(place_, this); +#endif } void InterpreterCore::SetCopyProgram(std::shared_ptr prog) { @@ -101,6 +113,9 @@ void InterpreterCore::SetCopyProgram(std::shared_ptr prog) { paddle::framework::FetchList InterpreterCore::Run( const std::vector& feed_names, const std::vector& feed_tensors) { +#ifdef PADDLE_WITH_MKLDNN + platform::AttachPointerHashToMKLDNNKey(this, place_); +#endif bool is_build = is_build_; global_scope_->SetLocalScope(local_scope_); Prepare(feed_names, feed_tensors, is_build); @@ -120,6 +135,9 @@ paddle::framework::FetchList InterpreterCore::Run( paddle::framework::FetchList InterpreterCore::Run( const std::vector& feed_names) { +#ifdef PADDLE_WITH_MKLDNN + platform::AttachPointerHashToMKLDNNKey(this, place_); +#endif if (!is_build_) { if (create_local_scope_ && global_scope_->GetMutableLocalScope() != diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.cc b/paddle/fluid/framework/new_executor/interpretercore_util.cc index b9470cd3736d1e95d19e7414920bfb471212c542..d56082a91a61f8aa28f679e482760eb777b07dbc 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.cc +++ b/paddle/fluid/framework/new_executor/interpretercore_util.cc @@ -21,6 +21,10 @@ #include "paddle/fluid/operators/controlflow/while_op_helper.h" #include "paddle/phi/core/kernel_factory.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + PADDLE_DEFINE_EXPORTED_bool( new_executor_sequential_run, false, "Enable sequential execution for standalone executor, used for debug"); @@ -312,6 +316,10 @@ void build_op_func_list(const platform::Place& place, operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp( main_program, block.ID(), ops_unique); +#ifdef PADDLE_WITH_MKLDNN + platform::RegisterModelLayout(ops_unique, place); +#endif + // its elements will be moved to vec_func_list std::vector> ops; for (auto& op_unique : ops_unique) { diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc index 50770b6c4a7fafc7fc15a5f352dd60c3d3dcd4d9..a22502314759f2075fd51811738d08fa4d924072 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor.cc @@ -112,7 +112,8 @@ std::shared_ptr StandaloneExecutor::GetInterpreterCore( auto iter = interpretercores_.find(oss.str()); if (iter == interpretercores_.end()) { - VLOG(3) << "create interpreter_core for " << oss.str(); + VLOG(3) << "create interpreter_core for " << oss.str() << " on place " + << place_; VLOG(3) << "add fetch op: " << add_fetch_op; std::shared_ptr core = nullptr; if (add_fetch_op) { diff --git a/paddle/fluid/framework/new_executor/standalone_executor.h b/paddle/fluid/framework/new_executor/standalone_executor.h index e84df2abb36d99f3ccfc49d41e044fa4fe173018..0b9e348ab7644a0ccd1c5d14c121a1931f80396a 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.h +++ b/paddle/fluid/framework/new_executor/standalone_executor.h @@ -63,7 +63,7 @@ class StandaloneExecutor : public ExecutorBase { const std::vector& feed_names, const std::vector& fetch_names, bool add_fetch_op); - const platform::Place& place_; + platform::Place place_; const ProgramDesc& startup_prog_; const ProgramDesc& main_prog_; VariableScope global_scope_; diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc index de3d8bd996149f92ed24be63fadacfc51c2764b0..111ca9c63c634634d30de11186e5f8b0a4ef7f0b 100644 --- a/paddle/fluid/operators/controlflow/fetch_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_op.cc @@ -33,6 +33,7 @@ static void DataCopy(const framework::LoDTensor &src_item, framework::Tensor out; // Convert to desired Paddle layout, apart from grads of filter // as params are not a subject to paddle's data_format + VLOG(4) << "innerTransDataLayoutFromMKLDNN"; framework::innerTransDataLayoutFromMKLDNN( src_item.layout(), fetch_var_name == framework::GradVarName("Filter") ? framework::DataLayout::kNCHW diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc index 96946ee15f41a5d70484562ec2cb490e0ed681c0..f26bcdca4a7b316115aa0988eef4f9b861d5186f 100644 --- a/paddle/fluid/operators/transfer_layout_op.cc +++ b/paddle/fluid/operators/transfer_layout_op.cc @@ -67,19 +67,25 @@ class TransferLayoutOp : public framework::OperatorWithKernel { // kernel's device type is decided by input tensor place auto *in = ctx.InputVar("X"); auto *in_tensor = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in); - PADDLE_ENFORCE_EQ(in_tensor->IsInitialized(), true, - platform::errors::PreconditionNotMet( - "The tensor of Input(X) is not initialized.")); + // NOTE(zhiqiu): hot fix, allow empty tensor of kMKLDNN layout to run this + // op + if (in_tensor->layout() != DataLayout::kMKLDNN) { + PADDLE_ENFORCE_EQ(in_tensor->IsInitialized(), true, + platform::errors::PreconditionNotMet( + "The tensor of Input(X) is not initialized.")); + } + auto place = + in_tensor->IsInitialized() ? in_tensor->place() : platform::CPUPlace(); + // dtype is not important - return framework::OpKernelType(framework::proto::VarType::FP32, - in_tensor->place()); + return framework::OpKernelType(framework::proto::VarType::FP32, place); } framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, const framework::Tensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, - tensor.place(), + expected_kernel_type.place_, expected_kernel_type.data_layout_); } }; @@ -99,7 +105,9 @@ class TransferLayoutKernel { auto &dev_ctx = ctx.device_context(); auto src_layout = ctx.Attr("src_layout"); auto dst_layout = ctx.Attr("dst_layout"); - TransferLayoutFunctor(x, out, dev_ctx, src_layout, dst_layout)(); + auto input_name = ctx.InputName("X"); + TransferLayoutFunctor(x, out, dev_ctx, src_layout, dst_layout, + input_name)(); } }; diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index 06bf54e998cc2f080f828c0a723f8e6631eb70e2..c90a44dc49445b6301bdf09e7437d8efa5c5daef 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -39,12 +39,14 @@ class TransferLayoutFunctor { public: TransferLayoutFunctor(const framework::Variable *in, framework::Variable *out, const platform::DeviceContext &dev_ctx, - const int src_layout, const int dst_layout) + const int src_layout, const int dst_layout, + std::string in_name) : in_(in), out_(out), dev_ctx_(dev_ctx), src_layout_(src_layout), - dst_layout_(dst_layout) {} + dst_layout_(dst_layout), + in_name_(in_name) {} void operator()() const { auto &in_tensor = *framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_); @@ -54,8 +56,18 @@ class TransferLayoutFunctor { out_tensor.set_layout(out_layout); #ifdef PADDLE_WITH_MKLDNN + // NOTE(zhiqiu): to handle the special case in ApplyDataTransform() in + // data_transfer.cc auto in_layout = static_cast(src_layout_); + auto *tensor_out = out_->GetMutable(); VLOG(4) << in_layout << "->" << out_layout << " " << in_tensor.layout(); + if (!in_tensor.IsInitialized() && in_layout == DataLayout::kMKLDNN && + out_layout == DataLayout::kNHWC) { + tensor_out->Resize(in_tensor.dims()); + tensor_out->set_layout(out_layout); + platform::MatchShapeToLayout(tensor_out, in_layout, out_layout); + return; + } if (in_layout == DataLayout::kMKLDNN || out_layout == DataLayout::kMKLDNN) { PADDLE_ENFORCE_NE( in_layout, out_layout, @@ -81,13 +93,21 @@ class TransferLayoutFunctor { out_tensor.set_layout(DataLayout::kMKLDNN); out_tensor.set_format(out_format); } else { - VLOG(4) << "kNCHW"; + auto target_layout = paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout(); + // NOTE(zhiqiu): hot fix, follow the same logic in DataCopy() in + // fetch_op.cc + if (out_layout == DataLayout::kNCHW && + in_name_ == framework::GradVarName("Filter")) { + target_layout = out_layout; + } + VLOG(4) << "innerTransDataLayoutFromMKLDNN: " << in_layout << "->" + << target_layout; // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel // Do transform via MKLDNN lib paddle::framework::innerTransDataLayoutFromMKLDNN( - in_layout, paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout(), - in_tensor, &out_tensor, dev_ctx_.GetPlace()); + in_layout, target_layout, in_tensor, &out_tensor, + dev_ctx_.GetPlace()); } } else { // Case3 - transfrom between Non-MKLDNN OPKernels @@ -132,6 +152,7 @@ class TransferLayoutFunctor { const platform::DeviceContext &dev_ctx_; const int src_layout_; const int dst_layout_; + std::string in_name_; }; } // namespace operators diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 9aa362546ec23e978fff618c3b5d07796e02aaf0..5ee54b1c865287a23bd3399ebac488a5db150145 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -742,6 +742,7 @@ dnnl::stream& MKLDNNDeviceContextThreadLocals::Body::get_stream(void) { } void MKLDNNDeviceContext::ResetBlobMap(void* ptr) { + VLOG(4) << tls().get_curr_exec() << " " << ptr; std::lock_guard lock(*p_mutex_); if (!block_next_cache_clearing_) { VLOG(3) << "Clearing DNNL cache."; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 4001fd744e67784d736cb157743e4b4d7fa4517e..d2e48c1113860e1b1ea36057582c609610498699 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -563,6 +563,7 @@ inline void RegisterModelLayout( std::vector>& ops, const platform::Place& place) { if (platform::is_cpu_place(place)) { + VLOG(4) << "RegisterModelLayout for mkldnn"; auto check_attrib = [](std::unique_ptr& op, const std::string& attrib_name) -> bool { if (op->HasAttr(attrib_name)) {