未验证 提交 62d44836 编写于 作者: A arlesniak 提交者: GitHub

Added verbose oneDNN lib version (#29378)

上级 ff6a1450
...@@ -144,6 +144,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -144,6 +144,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The axis is expected to be in range of [%d, %d), but got %d", "The axis is expected to be in range of [%d, %d), but got %d",
-rank, rank, concat_axis)); -rank, rank, concat_axis));
platform::MKLDNNDeviceContext::tls().log_lib_version();
if (concat_axis < 0) { if (concat_axis < 0) {
concat_axis = concat_axis + rank; concat_axis = concat_axis + rank;
} }
......
...@@ -572,6 +572,7 @@ class FCMKLDNNOpKernel : public framework::OpKernel<T_in> { ...@@ -572,6 +572,7 @@ class FCMKLDNNOpKernel : public framework::OpKernel<T_in> {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
platform::is_cpu_place(ctx.GetPlace()), true, platform::is_cpu_place(ctx.GetPlace()), true,
platform::errors::PreconditionNotMet("FC MKL-DNN must use CPUPlace.")); platform::errors::PreconditionNotMet("FC MKL-DNN must use CPUPlace."));
platform::MKLDNNDeviceContext::tls().log_lib_version();
auto input = ctx.Input<LoDTensor>("Input"); auto input = ctx.Input<LoDTensor>("Input");
auto w = ctx.Input<Tensor>("W"); auto w = ctx.Input<Tensor>("W");
auto bias = ctx.Input<Tensor>("Bias"); auto bias = ctx.Input<Tensor>("Bias");
......
...@@ -378,6 +378,7 @@ class DNNLMatMulKernel : public framework::OpKernel<T> { ...@@ -378,6 +378,7 @@ class DNNLMatMulKernel : public framework::OpKernel<T> {
platform::errors::Unimplemented( platform::errors::Unimplemented(
"DNNL matmul doesn't support multiple heads.")); "DNNL matmul doesn't support multiple heads."));
} }
platform::MKLDNNDeviceContext::tls().log_lib_version();
ExecuteMatMul<T, T>(ctx); ExecuteMatMul<T, T>(ctx);
} }
}; };
......
...@@ -353,6 +353,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> { ...@@ -353,6 +353,7 @@ class MulMKLDNNKernel : public framework::OpKernel<XT> {
PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"Operator DNNL Mul must use CPUPlace")); "Operator DNNL Mul must use CPUPlace"));
platform::MKLDNNDeviceContext::tls().log_lib_version();
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>(); auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto &mkldnn_engine = dev_ctx.GetEngine(); const auto &mkldnn_engine = dev_ctx.GetEngine();
......
...@@ -466,6 +466,15 @@ MKLDNNDeviceContextThreadLocals::Body::get_cur_paddle_data_layout(void) { ...@@ -466,6 +466,15 @@ MKLDNNDeviceContextThreadLocals::Body::get_cur_paddle_data_layout(void) {
return cur_paddle_data_layout; return cur_paddle_data_layout;
} }
void MKLDNNDeviceContextThreadLocals::Body::log_lib_version(void) {
if (!said_once) {
said_once = true;
auto dv = dnnl::version();
LOG(INFO) << "oneDNN v" << dv->major << "." << dv->minor << "."
<< dv->patch;
}
}
void MKLDNNDeviceContext::ResetBlobMap() { void MKLDNNDeviceContext::ResetBlobMap() {
std::lock_guard<decltype(*p_mutex_)> lock(*p_mutex_); std::lock_guard<decltype(*p_mutex_)> lock(*p_mutex_);
if (!block_next_cache_clearing_) { if (!block_next_cache_clearing_) {
......
...@@ -466,6 +466,7 @@ class MKLDNNDeviceContextThreadLocals { ...@@ -466,6 +466,7 @@ class MKLDNNDeviceContextThreadLocals {
typedef MKLDNNDeviceContextThreadLocals self; typedef MKLDNNDeviceContextThreadLocals self;
struct Body { struct Body {
bool said_once = false;
size_t cur_mkldnn_session_id; size_t cur_mkldnn_session_id;
// Current data input shape string. // Current data input shape string.
// - For fixed-shape, it's a null string in default. // - For fixed-shape, it's a null string in default.
...@@ -485,6 +486,7 @@ class MKLDNNDeviceContextThreadLocals { ...@@ -485,6 +486,7 @@ class MKLDNNDeviceContextThreadLocals {
void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity); void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity);
void set_cur_paddle_data_layout(framework::DataLayout dl); void set_cur_paddle_data_layout(framework::DataLayout dl);
framework::DataLayout get_cur_paddle_data_layout(void); framework::DataLayout get_cur_paddle_data_layout(void);
void log_lib_version(void);
}; };
MKLDNNDeviceContextThreadLocals() = default; MKLDNNDeviceContextThreadLocals() = default;
MKLDNNDeviceContextThreadLocals(const MKLDNNDeviceContextThreadLocals& c) = MKLDNNDeviceContextThreadLocals(const MKLDNNDeviceContextThreadLocals& c) =
......
...@@ -45,7 +45,9 @@ class MKLDNNHandlerT { ...@@ -45,7 +45,9 @@ class MKLDNNHandlerT {
key_common_(base_key), key_common_(base_key),
key_(platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, base_key)), key_(platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, base_key)),
fwd_pd_(nullptr), fwd_pd_(nullptr),
bwd_pd_(nullptr) {} bwd_pd_(nullptr) {
platform::MKLDNNDeviceContext::tls().log_lib_version();
}
std::shared_ptr<TForward> AcquireForwardPrimitive() { std::shared_ptr<TForward> AcquireForwardPrimitive() {
const std::string key_p = key_ + "@fwd_p"; const std::string key_p = key_ + "@fwd_p";
...@@ -313,7 +315,9 @@ class MKLDNNHandler { ...@@ -313,7 +315,9 @@ class MKLDNNHandler {
: dev_ctx_(dev_ctx), : dev_ctx_(dev_ctx),
engine_(engine), engine_(engine),
key_common_(base_key), key_common_(base_key),
key_(platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, base_key)) {} key_(platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, base_key)) {
platform::MKLDNNDeviceContext::tls().log_lib_version();
}
std::shared_ptr<mkldnn::memory> AcquireSrcMemory( std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
const mkldnn::memory::desc& md, void* ptr) { const mkldnn::memory::desc& md, void* ptr) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册