diff --git a/README.md b/README.md index 8d89c6b1ec9e4aefbd64328dedb4e8c7cc50c21b..63abca069a6629ac59739224ded9cd9f06207d0a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) -[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 9c42044ec163e9db1dd21d5c3915b010c30fdf1c..c4deef6f579a64c911b734699d4e9b221fa578a6 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -96,6 +96,20 @@ if(NOT APPLE AND NOT ANDROID) set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") endif(NOT APPLE AND NOT ANDROID) +set_property(GLOBAL PROPERTY FLUID_MODULES "") +# find all fluid modules is used for paddle fluid static library +# for building inference libs +function(find_fluid_modules TARGET_NAME) + get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(FIND "${__target_path}" "fluid" pos) + if(pos GREATER 1) + get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) + set(fluid_modules ${fluid_modules} ${TARGET_NAME}) + set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}") + endif() +endfunction(find_fluid_modules) + function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 1231a111f091b3c965a07114ba6496602c16dba8..0c720faa353438b76a72e1574cb90931ddd0cf73 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -12,19 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -set_property(GLOBAL PROPERTY FLUID_MODULES "") -# find all fluid modules is used for paddle fluid static library -function(find_fluid_modules TARGET_NAME) - get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) - string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) - string(FIND "${__target_path}" "fluid" pos) - if(pos GREATER 1) - get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) - set(fluid_modules ${fluid_modules} ${TARGET_NAME}) - set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}") - endif() -endfunction(find_fluid_modules) - # make package for paddle fluid shared and static library function(copy TARGET) set(options "") diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index bc48fd3b479157d4aea390cd5f4dc61ea46dca4b..cd00b7de7338982308acfa1f1e8c38e010c6a43b 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -147,9 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, "Input tensor type is not supported: ", in.type().name()); memory::data_type out_type = in_type; - auto in_format = MKLDNNFormatForSize(in_tz.size(), in.format()); + auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format()); auto out_format = - MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); + platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); void* in_data = GetDataFromTensor(in, in_type); diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 67f91e4e48d3e11ed493c5e6943cb9071aff60c4..90bb206ec6b698bc23ad1a5c9609a25186ec6de8 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -62,12 +62,6 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { return MKLDNNDataType::data_undef; } -inline MKLDNNFormat MKLDNNFormatForSize(size_t dims_size, - MKLDNNFormat default_format) { - return (dims_size == 1 - ? mkldnn::memory::format::x - : dims_size == 2 ? mkldnn::memory::format::nc : default_format); -} #endif void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 635eb404d4a94e9a765f511bb249ce0cce22c477..82872224501709080ff02a13464d58543a0abda8 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -18,6 +18,10 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_type_transform.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace framework { @@ -48,8 +52,8 @@ void TransformData(const OpKernelType &expected_kernel_type, // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel // Just set layout/format. No real transform occur - auto out_format = - MKLDNNFormatForSize(in.dims().size(), ToMKLDNNFormat(lin)); + auto out_format = platform::MKLDNNFormatForSize(in.dims().size(), + ToMKLDNNFormat(lin)); out.ShareDataWith(input_tensor); out.set_layout(DataLayout::kMKLDNN); diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 31516a884baefc2e50b7774a7993740f6930212f..f98011e896f4033ef210e0eb69f93ce7800a3cd6 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -73,18 +73,12 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { - // NOTE(zcd): Because TensorCopy is an async operation, when the src_place - // and dst_place are two different GPU, to ensure that the operation can - // be carried out correctly, we should make ctx wait. - // If ctx_place and src_place are the same, we should add ctx.Wait() - // after memory::Copy; if ctx_place and dst_place are the same, we should - // add ctx.Wait() before memory::Copy. if (platform::is_same_place(ctx_place, src_place)) { memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); - ctx.Wait(); + platform::DeviceContextPool::Instance().Get(src.place())->Wait(); } else if (platform::is_same_place(ctx_place, dst_place)) { - ctx.Wait(); + platform::DeviceContextPool::Instance().Get(src.place())->Wait(); memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { @@ -97,13 +91,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place, Tensor* dst) { - // NOTE(zcd): If the src.place() and dst_place are two different GPU, - // the copy operation is carried out on the dst_place's stream. This is - // very important, because TensorCopy is an async operator, and in most - // case, once this copy operator returns, dst is to be used in dst_place's - // stream, if this copy operation is carried out on the src_place's stream, - // when dst is used in dst_place's stream the copy operation may be - // not completed. platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); const platform::DeviceContext* dev_ctx; if (platform::is_gpu_place(dst_place)) { diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index dca279b69382b80e055f661cefe84b81326704b5..4457382ade37a12f5f3613fc4113fbf1f6f91124 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -23,10 +23,25 @@ limitations under the License. */ namespace paddle { namespace framework { +// NOTE(zcd): Because TensorCopy is an async operation, when the src_place +// and dst_place are two different GPU, to ensure that the operation can +// be carried out correctly, there is a src_ctx wait operation in TensorCopy. +// If ctx_place and src_place are the same, src_ctx.Wait() is added +// after memory::Copy; if ctx_place and dst_place are the same, +// src_ctx.Wait() is added before memory::Copy. void TensorCopy(const Tensor& src, const platform::Place& dst_place, const platform::DeviceContext& ctx, Tensor* dst); + +// NOTE(zcd): If the src.place() and dst_place are two different GPU, +// the copy operation is carried out on the dst_place's stream. This is +// very important, because TensorCopy is an async operator, and in most +// case, once this copy operator returns, dst is to be used in dst_place's +// stream, if this copy operation is carried out on the src_place's stream, +// when dst is used in dst_place's stream the copy operation may be +// not completed. void TensorCopy(const Tensor& src, const platform::Place& dst_place, Tensor* dst); + void TensorCopySync(const Tensor& src, const platform::Place& dst_place, Tensor* dst); diff --git a/paddle/fluid/operators/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/batch_norm_mkldnn_op.cc index 6ecb43c49c30f9da2a273d506f7b85c0a4f5fa2c..9ab2179b5fe689762704039c5f67dd080e530aa5 100644 --- a/paddle/fluid/operators/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/batch_norm_mkldnn_op.cc @@ -115,9 +115,12 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu; // create mkldnn memory from input x tensor - auto src_memory = - memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, - to_void_cast(x_data)); + mkldnn::memory::format input_format = + platform::MKLDNNFormatForSize(src_tz.size(), x->format()); + + auto src_memory = memory( + {{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine}, + to_void_cast(x_data)); // create primitive descriptor for batch norm forward using bn_fwd_types = bn_type_traits; @@ -251,15 +254,21 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { using bn_bwd_types = bn_type_traits; // create mkldnn memory from input diff_y tensor - auto user_diff_dst_memory = - memory({{{diff_dst_tz}, memory::data_type::f32, diff_y->format()}, - mkldnn_engine}, - to_void_cast(diff_y_data)); + + mkldnn::memory::format dst_format = + platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format()); + + auto user_diff_dst_memory = memory( + {{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine}, + to_void_cast(diff_y_data)); // create mkldnn memory from input x tensor - auto src_memory = - memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, - to_void_cast(x_data)); + mkldnn::memory::format input_format = + platform::MKLDNNFormatForSize(src_tz.size(), x->format()); + + auto src_memory = memory( + {{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine}, + to_void_cast(x_data)); // for diff_dst, try to use same format as dst in forward pass auto diff_dst_pd = batch_norm_fwd_pd.get()->dst_primitive_desc(); diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index ed99932546446eb877c9701de15e2d37d29b5f88..a6cccc31219104767ac38bdebeb1d4c0e8c2ac01 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -228,7 +228,7 @@ class MKLDNNHandler { return dstr; }; return dims2str(operand_dims) + suffix; - }; + } protected: const MKLDNNDeviceContext& dev_ctx_; @@ -237,5 +237,15 @@ class MKLDNNHandler { bool is_reusing_; }; +inline mkldnn::memory::format MKLDNNFormatForSize( + size_t dims_size, mkldnn::memory::format data_format) { + if (dims_size == 1) { + return mkldnn::memory::format::x; + } else if (dims_size == 2) { + return mkldnn::memory::format::nc; + } + return data_format; +} + } // namespace platform } // namespace paddle