diff --git a/Dockerfile b/Dockerfile index 164fe84904947bfc3cf71132b5fba04744460b26..ea39efd00bb5c0a7deb3f6d57083d83a673b883c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,7 +70,7 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8 # specify sphinx version as 1.5.6 and remove -U option for [pip install -U # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest # version(1.7.1 for now), which causes building documentation failed. -RUN pip install --upgrade pip==9.0.3 && \ +RUN easy_install -U pip && \ pip install -U wheel && \ pip install -U docopt PyYAML sphinx==1.5.6 && \ pip install sphinx-rtd-theme==0.1.9 recommonmark diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index 10662fc96704685f030a5d76c6857d4bc20a63d9..499682f644d60c16c3025870e7dd2a890630a2bb 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -24,7 +24,7 @@ set(BOOST_PROJECT "extern_boost") # So we use 1.41.0 here. set(BOOST_VER "1.41.0") set(BOOST_TAR "boost_1_41_0") -set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz") +set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz") set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 0332e39d14200da1c1af52675f0ccad2c07de405..25c07850dda7b2f69c2207c37b9d2368632104ec 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -53,11 +53,9 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS ${MKLDNN_DEPENDS} GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" - GIT_TAG "v0.14" + GIT_TAG "db3424ad44901513c03a1ea31ccaacdf633fbe9f" PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" - # Patch MKLDNN to compile with gcc 4.8, the related issue is in intel/mkl-dnn#237. - PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/mkldnn.hpp ${MKLDNN_SOURCES_DIR}/src/extern_mkldnn/include/mkldnn.hpp CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} CMAKE_ARGS -DMKLROOT=${MKLML_ROOT} diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index cebde2750444c4085ebb7046e8a175cc1cc104d1..e9a37b52e61b2525b047352cc70510df83eccb7f 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -28,7 +28,7 @@ INCLUDE(ExternalProject) SET(MKLML_PROJECT "extern_mklml") SET(MKLML_VER "mklml_lnx_2018.0.3.20180406") -SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz") +SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DST_DIR "mklml") diff --git a/doc/fluid/howto/optimization/cpu_profiling_cn.md b/doc/fluid/howto/optimization/cpu_profiling_cn.md index 8266dec3c6125a09b90ac0ccd4aa5464f5c7db31..198a05a79e19227e90eaafe116217a164cd51a7d 100644 --- a/doc/fluid/howto/optimization/cpu_profiling_cn.md +++ b/doc/fluid/howto/optimization/cpu_profiling_cn.md @@ -1,3 +1,5 @@ +# CPU性能调优 + 此教程会介绍如何使用Python的cProfile包、Python库yep、Google perftools来进行性能分析 (profiling) 与调优(performance tuning)。 Profling 指发现性能瓶颈。系统中的瓶颈可能和程序员开发过程中想象的瓶颈相去甚远。Tuning 指消除瓶颈。性能优化的过程通常是不断重复地 profiling 和 tuning。 @@ -8,7 +10,7 @@ PaddlePaddle 用户一般通过调用 Python API 编写深度学习程序。大 * Python 与 C++ 混合代码的性能分析 -# Python代码的性能分析 +## Python代码的性能分析 ### 生成性能分析文件 diff --git a/doc/fluid/howto/optimization/cpu_profiling_en.md b/doc/fluid/howto/optimization/cpu_profiling_en.md index e95556dd608b7ff0a3eb18873df0015a2da94e7c..216694965b3c878a8a5f3ccd2a0cba8d21d9ce05 100644 --- a/doc/fluid/howto/optimization/cpu_profiling_en.md +++ b/doc/fluid/howto/optimization/cpu_profiling_en.md @@ -1,3 +1,5 @@ +# Tune CPU performance + This tutorial introduces techniques we use to profile and tune the CPU performance of PaddlePaddle. We will use Python packages `cProfile` and `yep`, and Google's `perftools`. @@ -14,7 +16,7 @@ the profiling and tuning of 1. the Python code and 1. the mixture of Python and C++ code. -# Profiling the Python Code +## Profiling the Python Code ### Generate the Performance Profiling File diff --git a/doc/v2/build_and_install/pip_install_cn.rst b/doc/v2/build_and_install/pip_install_cn.rst index b3d882743785e8ee301b71b696230531d2b7ba58..aa1dc6ee2cc9a3a528e54ce2da07746158735f56 100644 --- a/doc/v2/build_and_install/pip_install_cn.rst +++ b/doc/v2/build_and_install/pip_install_cn.rst @@ -37,12 +37,11 @@ PaddlePaddle可以使用常用的Python包管理工具 :header: "版本说明", "cp27-cp27mu", "cp27-cp27m" :widths: 1, 3, 3 - "cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cpu_noavx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_" + "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" + "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" + "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" .. _pip_dependency: diff --git a/doc/v2/build_and_install/pip_install_en.rst b/doc/v2/build_and_install/pip_install_en.rst index 1e409d86b9775094998f72f92954f4bbc1013ea1..a70821eb487be841060e6b5f7fc8b014634ac5ba 100644 --- a/doc/v2/build_and_install/pip_install_en.rst +++ b/doc/v2/build_and_install/pip_install_en.rst @@ -40,12 +40,11 @@ If the links below shows up the login form, just click "Log in as guest" to star :header: "version", "cp27-cp27mu", "cp27-cp27m" :widths: 1, 3, 3 - "cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cpu_noavx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_" - "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl `_" + "cpu_avx_mkl", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" + "cpu_avx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" + "cpu_noavx_openblas", "`paddlepaddle-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-latest-cp27-cp27m-linux_x86_64.whl `_" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-latest-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle_gpu-latest-cp27-cp27m-linux_x86_64.whl `_" .. _pip_dependency: diff --git a/paddle/fluid/framework/data_device_transform.cc b/paddle/fluid/framework/data_device_transform.cc index 85dbb39e6fba735471446b5e5e71a612282c498a..a876725ac0f17838458065c4b4753a03e2812801 100644 --- a/paddle/fluid/framework/data_device_transform.cc +++ b/paddle/fluid/framework/data_device_transform.cc @@ -36,9 +36,11 @@ void TransDataDevice(const Tensor& in, const platform::Place& dst_place, VLOG(3) << "DeviceTransform in, src_place " << in.place() << " dst_place: " << dst_place; auto* dev_ctx = GetDeviceContext(in.place(), dst_place); - dev_ctx->Wait(); + TensorCopy(in, dst_place, *dev_ctx, out); - dev_ctx->Wait(); + if (platform::is_gpu_place(in.place()) && platform::is_cpu_place(dst_place)) { + dev_ctx->Wait(); + } } } // namespace framework diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index c0523f3c795b103c0c27081ec5dc717f6a0f11e0..5a57ec20585c26dbcd4251464718fc819148a7a5 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -91,6 +91,12 @@ void TransDataType(const OpKernelType& kernel_type_for_var, case proto::VarType::BOOL: framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; + case proto::VarType::INT16: + framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); + break; + case proto::VarType::UINT8: + framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); + break; default: PADDLE_THROW("Not support type %d", src_type); } diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index ce91d7a82674364560b8065277b28b51ae1b303a..4e431561f81b2a84c06dff9fcb041317ebc84ae3 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -228,7 +228,8 @@ static bool has_fetch_operators( void Executor::Run(const ProgramDesc& program, Scope* scope, std::map* feed_targets, std::map* fetch_targets, - bool create_vars, const std::string& feed_holder_name, + bool create_local_scope, bool create_vars, + const std::string& feed_holder_name, const std::string& fetch_holder_name) { platform::RecordBlock b(kProgramId); bool has_feed_ops = @@ -290,8 +291,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, } auto ctx = Prepare(*copy_program, 0); - RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets, create_vars, - feed_holder_name, fetch_holder_name); + RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets, + create_local_scope, create_vars, feed_holder_name, + fetch_holder_name); } std::unique_ptr Executor::Prepare( @@ -366,8 +368,9 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, void Executor::RunPreparedContext( ExecutorPrepareContext* ctx, Scope* scope, std::map* feed_targets, - std::map* fetch_targets, bool create_vars, - const std::string& feed_holder_name, const std::string& fetch_holder_name) { + std::map* fetch_targets, bool create_local_scope, + bool create_vars, const std::string& feed_holder_name, + const std::string& fetch_holder_name) { auto& global_block = ctx->prog_.Block(ctx->block_id_); PADDLE_ENFORCE( @@ -387,7 +390,7 @@ void Executor::RunPreparedContext( } } - RunPreparedContext(ctx, scope, create_vars, create_vars); + RunPreparedContext(ctx, scope, create_local_scope, create_vars); // obtain the data of fetch_targets from fetch_holder for (auto* op : global_block.AllOps()) { diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 4a3d637e2d79f8cbd83412eea2d73e4b497ef1e7..0c3c23611d95e0da67cabfb8fb2755a4a52c991b 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -57,7 +57,7 @@ class Executor { void Run(const ProgramDesc& program, Scope* scope, std::map* feed_targets, std::map* fetch_targets, - bool create_vars = true, + bool create_local_scope = true, bool create_vars = true, const std::string& feed_holder_name = "feed", const std::string& fetch_holder_name = "fetch"); @@ -76,6 +76,7 @@ class Executor { void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, std::map* feed_targets, std::map* fetch_targets, + bool create_local_scope = true, bool create_vars = true, const std::string& feed_holder_name = "feed", const std::string& fetch_holder_name = "fetch"); diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index b02e5c99f00eaf03c3753e43575cbc67e834774e..cc1589514aab3b973b4909159748bc4223cdce46 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -149,7 +149,7 @@ void TestInference(const std::string& dirname, state = paddle::platform::ProfilerState::kCPU; } else { #ifdef PADDLE_WITH_CUDA - state = paddle::platform::ProfilerState::kCUDA; + state = paddle::platform::ProfilerState::kAll; // The default device_id of paddle::platform::CUDAPlace is 0. // Users can get the device_id using: // int device_id = place.GetDeviceId(); @@ -172,7 +172,7 @@ void TestInference(const std::string& dirname, } // Disable the profiler and print the timing information paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, - "load_program_profiler.txt"); + "load_program_profiler"); paddle::platform::ResetProfiler(); // 3. Get the feed_target_names and fetch_target_names @@ -208,10 +208,10 @@ void TestInference(const std::string& dirname, if (PrepareContext) { ctx = executor.Prepare(*inference_program, 0); executor.RunPreparedContext(ctx.get(), scope, &feed_targets, - &fetch_targets, CreateVars); + &fetch_targets, true, CreateVars); } else { executor.Run(*inference_program, scope, &feed_targets, &fetch_targets, - CreateVars); + true, CreateVars); } // Enable the profiler @@ -236,8 +236,7 @@ void TestInference(const std::string& dirname, // Disable the profiler and print the timing information paddle::platform::DisableProfiler( - paddle::platform::EventSortingKey::kDefault, - "run_inference_profiler.txt"); + paddle::platform::EventSortingKey::kDefault, "run_inference_profiler"); paddle::platform::ResetProfiler(); } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index ace6b083e93140fd5dda4032349f486bd0a9ccbf..ee6ea7141e5a856b00b1f7254b22816b0b2cf362 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -205,8 +205,9 @@ if(WITH_DISTRIBUTE) set_source_files_properties(fetch_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op listen_and_serv_op sum_op executor) - cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op listen_and_serv_op executor) if(WITH_GPU) + set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS send_op listen_and_serv_op executor) op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc) set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) else() diff --git a/paddle/fluid/operators/beam_search_op.h b/paddle/fluid/operators/beam_search_op.h index 9b51db8a45186c2a90cf8b2eb7966d0aaea04028..46bc4f6f936929050276e8b3b93f1eddd62ac638 100644 --- a/paddle/fluid/operators/beam_search_op.h +++ b/paddle/fluid/operators/beam_search_op.h @@ -14,10 +14,6 @@ limitations under the License. */ #pragma once -#ifdef PADDLE_WITH_TESTING -#include "gtest/gtest.h" -#endif - #include #include #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/operators/detail/grpc_server.cc b/paddle/fluid/operators/detail/grpc_server.cc index d09f8479b765ad26cc202bfdb2692828213c7956..eb114a47d99541402f748bfffcf6b10fde3e78e2 100644 --- a/paddle/fluid/operators/detail/grpc_server.cc +++ b/paddle/fluid/operators/detail/grpc_server.cc @@ -184,7 +184,7 @@ class RequestPrefetch final : public RequestBase { framework::Scope* local_scope = &scope_->NewScope(); auto* var = local_scope->FindVar(var_name); InitializeVariable(var, var_desc->GetType()); - executor_->RunPreparedContext(prefetch_ctx_, scope_, false, false); + executor_->RunPreparedContext(prefetch_ctx_, scope_); SerializeToByteBuffer(var_name, var, *dev_ctx_, &reply); diff --git a/paddle/fluid/operators/is_empty_op.cc b/paddle/fluid/operators/is_empty_op.cc index d3f3ad92442cafdd8d4cdc396d89721863d069c2..29b73951bbddd9bfd73c932d7801797590de5e8e 100644 --- a/paddle/fluid/operators/is_empty_op.cc +++ b/paddle/fluid/operators/is_empty_op.cc @@ -12,45 +12,41 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/is_empty_op.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" namespace paddle { namespace operators { -constexpr char kInput[] = "X"; -constexpr char kOutput[] = "Out"; - -class IsEmptyOp : public framework::OperatorBase { +class IsEmptyOp : public framework::OperatorWithKernel { public: - IsEmptyOp(const std::string &type, const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} + using framework::OperatorWithKernel::OperatorWithKernel; - private: - void RunImpl(const framework::Scope &scope, - const platform::Place &place) const override { - // get input - auto *var = scope.FindVar(Input(kInput)); - PADDLE_ENFORCE_NOT_NULL(var); - auto &tensor = var->Get(); - // get output - auto *out = scope.FindVar(Output(kOutput)); - PADDLE_ENFORCE_NOT_NULL(out); - auto *out_tensor = out->GetMutable(); + protected: + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of IsEmptyOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of IsEmptyOp should not be null."); + ctx->SetOutputDim("Out", {1}); + } - out_tensor->Resize({1}); - out_tensor->mutable_data(platform::CPUPlace())[0] = - framework::product(tensor.dims()) == 0; + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + framework::OpKernelType kt = framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + platform::CPUPlace()); + return kt; } }; -class IsEmptyOpProtoMaker : public framework::OpProtoAndCheckerMaker { +class IsEmptyOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput(kInput, "(Tensor) Tensor which is to be checked."); - AddOutput(kOutput, "(Tensor) a boolean Tensor that indicate empty or not."); + AddInput("X", "(LoDTensor) Tensor which is to be checked."); + AddOutput("Out", + "(LoDTensor) a boolean Tensor that indicate empty or not."); AddComment(R"DOC( IsEmpty Operator which checks whether a tensor is empty. @@ -62,5 +58,12 @@ It will just return product(tensor.ddims()) > 0; } // namespace operators } // namespace paddle -REGISTER_OP_WITHOUT_GRADIENT(is_empty, paddle::operators::IsEmptyOp, - paddle::operators::IsEmptyOpProtoMaker); +namespace ops = paddle::operators; + +REGISTER_OPERATOR(is_empty, ops::IsEmptyOp, ops::IsEmptyOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + is_empty, ops::IsEmptyOpKernel, + ops::IsEmptyOpKernel, + ops::IsEmptyOpKernel, + ops::IsEmptyOpKernel); diff --git a/paddle/fluid/operators/is_empty_op.h b/paddle/fluid/operators/is_empty_op.h new file mode 100644 index 0000000000000000000000000000000000000000..3e3af22fa8d842b6a1e67418446f1a40949e046b --- /dev/null +++ b/paddle/fluid/operators/is_empty_op.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + +namespace paddle { +namespace operators { + +template +class IsEmptyOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + // get input + auto* input_tensor = context.Input("X"); + // get output + auto* output_tensor = context.Output("Out"); + + output_tensor->mutable_data(platform::CPUPlace())[0] = + framework::product(input_tensor->dims()) == 0; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index abc88d3eb1514e159f4a880f44ecc0c0960a73d9..57eb5d9a0e73a51d9e2cef7ad7539c1b9da2c4ea 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -57,8 +57,7 @@ static void ParallelExecuteBlocks( framework::Async([&executor, &prepared, &program, &scope, idx]() { int run_block = idx; // thread local try { - executor->RunPreparedContext(prepared[run_block].get(), scope, - false, false); + executor->RunPreparedContext(prepared[run_block].get(), scope); } catch (std::exception &e) { LOG(ERROR) << "run sub program error " << e.what(); } @@ -211,8 +210,8 @@ static void AsyncUpdateThread( } auto fs = framework::Async([var_name, &executor, &v, prepared] { try { - executor->RunPreparedContext(prepared, v.second->GetMutableLocalScope(), - false, false); + executor->RunPreparedContext(prepared, + v.second->GetMutableLocalScope()); } catch (std::exception &e) { LOG(ERROR) << "run sub program error " << e.what(); } diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/pool_mkldnn_op.cc index 63eaaedcd5fc3df17902511dc02b25bf43ccd241..60e936298defe7c6ce8a33bdc7de05b52eb950e7 100644 --- a/paddle/fluid/operators/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/pool_mkldnn_op.cc @@ -18,6 +18,26 @@ limitations under the License. */ namespace paddle { namespace operators { +using mkldnn::memory; // Note: paddle has also "memory" namespace +using mkldnn::pooling_forward; +using mkldnn::pooling_backward; + +// Generate keys for storing/retriving primitives for this operator +// TODO(jczaja): Make hashing function more optimial +static std::string gethash(memory::dims& input_dims, std::string& pooling_type, + std::vector& ksize, std::vector& strides, + std::vector& paddings, std::string suffix) { + auto dims2str = [](memory::dims& operand_dims) { + std::string dstr = ""; + for (size_t i = 0; i < operand_dims.size(); ++i) { + dstr += std::to_string(operand_dims[i]) + "-"; + } + return dstr; + }; + return dims2str(input_dims) + dims2str(ksize) + dims2str(strides) + + dims2str(paddings) + pooling_type + suffix; +} + template class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { public: @@ -34,10 +54,6 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { // Get an unique name from "argument" name of "Out" variable // This name will be used as key when saving info into device context - const std::string key = ctx.op().Output("Out"); - const std::string key_pool_pd = key + "@pool_pd"; - const std::string key_pool_workspace_memory = - key + "@pool_workspace_memory"; std::string pooling_type = ctx.Attr("pooling_type"); std::vector ksize = ctx.Attr>("ksize"); @@ -63,37 +79,71 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { std::vector src_tz = paddle::framework::vectorize2int(input->dims()); std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); - // TODO(pzelazko-intel): support more formats - auto src_md = platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - auto dst_md = platform::MKLDNNMemDesc(dst_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - - std::shared_ptr pool_pd = - CreatePrimitiveDesc(src_md, dst_md, strides, paddings, ksize, - pooling_type, mkldnn_engine); - - // save pool_pd into global device context to be referred in backward path - dev_ctx.SetBlob(key_pool_pd, pool_pd); - - std::shared_ptr workspace_memory = - CreateWorkspaceMemory(pool_pd, pooling_type, mkldnn_engine); - - // save pool_workspace_memory to be referred in backward path - dev_ctx.SetBlob(key_pool_workspace_memory, workspace_memory); - - auto src_memory = - mkldnn::memory({src_md, mkldnn_engine}, - static_cast(const_cast(input_data))); - auto dst_memory = - mkldnn::memory({dst_md, mkldnn_engine}, - static_cast(const_cast(output_data))); + const std::string key = gethash(src_tz, pooling_type, ksize, strides, + paddings, ctx.op().Output("Out")); + const std::string key_pool_p = key + "@pool_p"; + const std::string key_pool_pd = key + "@pool_pd"; + const std::string key_pool_src_mem_p = key + "@pool_src_mem_p"; + const std::string key_pool_dst_mem_p = key + "@pool_dst_mem_p"; + const std::string key_pool_workspace_memory = + key + "@pool_workspace_memory"; - auto pool_prim = mkldnn::pooling_forward(*pool_pd, src_memory, dst_memory, - *workspace_memory); + auto pool_p = + std::static_pointer_cast(dev_ctx.GetBlob(key_pool_p)); + if (pool_p == nullptr) { + // TODO(pzelazko-intel): support more formats + + auto src_md = + platform::MKLDNNMemDesc(src_tz, platform::MKLDNNGetDataType(), + mkldnn::memory::format::nchw); + auto dst_md = + platform::MKLDNNMemDesc(dst_tz, platform::MKLDNNGetDataType(), + mkldnn::memory::format::nchw); + + std::shared_ptr pool_pd = + CreatePrimitiveDesc(src_md, dst_md, strides, paddings, ksize, + pooling_type, mkldnn_engine); + + // save pool_pd into global device context to be referred in backward path + dev_ctx.SetBlob(key_pool_pd, pool_pd); + + std::shared_ptr workspace_memory = + CreateWorkspaceMemory(pool_pd, pooling_type, mkldnn_engine); + + // save pool_workspace_memory to be referred in backward path + dev_ctx.SetBlob(key_pool_workspace_memory, workspace_memory); + + auto pool_src_memory_p = std::make_shared( + memory::primitive_desc{src_md, mkldnn_engine}, + static_cast(const_cast(input_data))); + dev_ctx.SetBlob(key_pool_src_mem_p, pool_src_memory_p); + + auto pool_dst_memory_p = std::make_shared( + memory::primitive_desc{dst_md, mkldnn_engine}, + static_cast(output_data)); + dev_ctx.SetBlob(key_pool_dst_mem_p, pool_dst_memory_p); + + pool_p = std::make_shared( + *pool_pd, *(pool_src_memory_p.get()), *(pool_dst_memory_p.get()), + *workspace_memory); + dev_ctx.SetBlob(key_pool_p, pool_p); + } else { + // Primitives already exist + auto pool_src_memory_p = + std::static_pointer_cast(dev_ctx.GetBlob(key_pool_src_mem_p)); + PADDLE_ENFORCE(pool_src_memory_p != nullptr, + "Fail to find pooling src mem_p in device context"); + auto pool_dst_memory_p = + std::static_pointer_cast(dev_ctx.GetBlob(key_pool_dst_mem_p)); + PADDLE_ENFORCE(pool_dst_memory_p != nullptr, + "Fail to find pooling dst mem_p in device context"); + pool_src_memory_p->set_data_handle( + reinterpret_cast(const_cast(input_data))); + pool_dst_memory_p->set_data_handle(output_data); + } // push primitive to stream and wait until it's executed - std::vector pipeline{pool_prim}; + std::vector pipeline{*(pool_p.get())}; mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); } @@ -120,9 +170,10 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { mkldnn::memory::primitive_desc workspace_md = pooling_type == "max" ? pool_pd->workspace_primitive_desc() - : mkldnn::memory::primitive_desc( - {{}, mkldnn::memory::f32, mkldnn::memory::format::nchw}, - engine); + : mkldnn::memory::primitive_desc({{}, + platform::MKLDNNGetDataType(), + mkldnn::memory::format::nchw}, + engine); auto p_workspace_memory = new mkldnn::memory(workspace_md); return std::unique_ptr(p_workspace_memory); @@ -140,13 +191,6 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { const Tensor* out_grad = ctx.Input(framework::GradVarName("Out")); Tensor* in_x_grad = ctx.Output(framework::GradVarName("X")); - // Get an unique name from "argument" name of "Out" variable - // This name will be used as key when referring info from device context - const std::string key = ctx.op().Input("Out"); - const std::string key_pool_pd = key + "@pool_pd"; - const std::string key_pool_workspace_memory = - key + "@pool_workspace_memory"; - std::string pooling_type = ctx.Attr("pooling_type"); std::vector ksize = ctx.Attr>("ksize"); std::vector strides = ctx.Attr>("strides"); @@ -171,43 +215,76 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { std::vector diff_dst_tz = paddle::framework::vectorize2int(out_grad->dims()); - auto diff_src_md = platform::MKLDNNMemDesc(diff_src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - auto diff_dst_md = platform::MKLDNNMemDesc(diff_dst_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - - // Retrieve pool_pd/pool_workspace_memory from device context - auto pool_pd = - std::static_pointer_cast( - dev_ctx.GetBlob(key_pool_pd)); - PADDLE_ENFORCE(pool_pd != nullptr, - "Fail to find pool_pd in device context"); - - auto workspace_memory = std::static_pointer_cast( - dev_ctx.GetBlob(key_pool_workspace_memory)); - PADDLE_ENFORCE(workspace_memory != nullptr, - "Fail to find workspace_memory in device context"); - - auto pool_bwd_desc = mkldnn::pooling_backward::desc( - pooling_type == "max" ? mkldnn::algorithm::pooling_max - : mkldnn::algorithm::pooling_avg, - diff_src_md, diff_dst_md, strides, ksize, paddings, paddings, - mkldnn::padding_kind::zero); - auto pool_bwd_pd = mkldnn::pooling_backward::primitive_desc( - pool_bwd_desc, mkldnn_engine, *pool_pd); - - auto diff_src_memory = - mkldnn::memory({diff_src_md, mkldnn_engine}, - static_cast(const_cast(in_x_grad_data))); - auto diff_dst_memory = - mkldnn::memory({diff_dst_md, mkldnn_engine}, - static_cast(const_cast(out_grad_data))); + // Get an unique name from "argument" name of "Out" variable + // This name will be used as key when referring info from device context + const std::string key = gethash(diff_src_tz, pooling_type, ksize, strides, + paddings, ctx.op().Input("Out")); + const std::string key_pool_bwd_p = key + "@pool_bwd_p"; + const std::string key_pool_diff_src_mem_p = key + "@pool_diff_src_mem_p"; + const std::string key_pool_diff_dst_mem_p = key + "@pool_diff_dst_mem_p"; + const std::string key_pool_pd = key + "@pool_pd"; + const std::string key_pool_workspace_memory = + key + "@pool_workspace_memory"; - auto bwd_prim = mkldnn::pooling_backward( - pool_bwd_pd, diff_dst_memory, *workspace_memory, diff_src_memory); + auto pool_bwd_p = std::static_pointer_cast( + dev_ctx.GetBlob(key_pool_bwd_p)); + if (pool_bwd_p == nullptr) { + auto diff_src_md = + platform::MKLDNNMemDesc(diff_src_tz, platform::MKLDNNGetDataType(), + mkldnn::memory::format::nchw); + auto diff_dst_md = + platform::MKLDNNMemDesc(diff_dst_tz, platform::MKLDNNGetDataType(), + mkldnn::memory::format::nchw); + // Retrieve pool_pd/pool_workspace_memory from device context + auto pool_pd = + std::static_pointer_cast( + dev_ctx.GetBlob(key_pool_pd)); + PADDLE_ENFORCE(pool_pd != nullptr, + "Fail to find pool_pd in device context"); + + auto workspace_memory = std::static_pointer_cast( + dev_ctx.GetBlob(key_pool_workspace_memory)); + PADDLE_ENFORCE(workspace_memory != nullptr, + "Fail to find workspace_memory in device context"); + + auto pool_diff_src_memory_p = std::make_shared(memory( + {diff_src_md, mkldnn_engine}, static_cast(in_x_grad_data))); + dev_ctx.SetBlob(key_pool_diff_src_mem_p, pool_diff_src_memory_p); + + auto pool_diff_dst_memory_p = std::make_shared( + memory({diff_dst_md, mkldnn_engine}, + static_cast(const_cast(out_grad_data)))); + dev_ctx.SetBlob(key_pool_diff_dst_mem_p, pool_diff_dst_memory_p); + + auto pool_bwd_desc = mkldnn::pooling_backward::desc( + pooling_type == "max" ? mkldnn::algorithm::pooling_max + : mkldnn::algorithm::pooling_avg, + diff_src_md, diff_dst_md, strides, ksize, paddings, paddings, + mkldnn::padding_kind::zero); + auto pool_bwd_pd = mkldnn::pooling_backward::primitive_desc( + pool_bwd_desc, mkldnn_engine, *pool_pd); + + pool_bwd_p = std::make_shared( + pool_bwd_pd, *(pool_diff_dst_memory_p.get()), *workspace_memory, + *(pool_diff_src_memory_p)); + dev_ctx.SetBlob(key_pool_bwd_p, pool_bwd_p); + } else { + // Primitives already exist + auto pool_diff_src_memory_p = std::static_pointer_cast( + dev_ctx.GetBlob(key_pool_diff_src_mem_p)); + PADDLE_ENFORCE(pool_diff_src_memory_p != nullptr, + "Fail to find pooling src mem_p in device context"); + auto pool_diff_dst_memory_p = std::static_pointer_cast( + dev_ctx.GetBlob(key_pool_diff_dst_mem_p)); + PADDLE_ENFORCE(pool_diff_dst_memory_p != nullptr, + "Fail to find pooling dst mem_p in device context"); + pool_diff_src_memory_p->set_data_handle( + reinterpret_cast(in_x_grad_data)); + pool_diff_dst_memory_p->set_data_handle(const_cast(out_grad_data)); + } // push primitive to stream and wait until it's executed - std::vector pipeline{bwd_prim}; + std::vector pipeline{*(pool_bwd_p.get())}; mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); } // Compute() }; diff --git a/paddle/fluid/operators/roi_pool_op.cu b/paddle/fluid/operators/roi_pool_op.cu index f905d690f984a20622c5fbcbcc813d888dfb19d9..50450b62f7b1c0b2b5abf01a43581a0e2d2cd01e 100644 --- a/paddle/fluid/operators/roi_pool_op.cu +++ b/paddle/fluid/operators/roi_pool_op.cu @@ -38,10 +38,10 @@ __global__ void GPUROIPoolForward( int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; for (size_t i = index; i < nthreads; i += offset) { - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; + int pw = i % pooled_width; + int ph = (i / pooled_width) % pooled_height; + int c = (i / pooled_width / pooled_height) % channels; + int n = i / pooled_width / pooled_height / channels; const int64_t* offset_input_rois = input_rois + n * kROISize; int roi_batch_ind = roi_batch_id_data[n]; @@ -52,14 +52,19 @@ __global__ void GPUROIPoolForward( int roi_width = max(roi_end_w - roi_start_w + 1, 1); int roi_height = max(roi_end_h - roi_start_h + 1, 1); - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - int hstart = static_cast(floor(static_cast(ph) * bin_size_h)); - int wstart = static_cast(floor(static_cast(pw) * bin_size_w)); - int hend = static_cast(ceil(static_cast(ph + 1) * bin_size_h)); - int wend = static_cast(ceil(static_cast(pw + 1) * bin_size_w)); + int hstart = static_cast(floor(static_cast(ph) * + static_cast(roi_height) / + static_cast(pooled_height))); + int wstart = static_cast(floor(static_cast(pw) * + static_cast(roi_width) / + static_cast(pooled_width))); + int hend = static_cast(ceil(static_cast(ph + 1) * + static_cast(roi_height) / + static_cast(pooled_height))); + int wend = static_cast(ceil(static_cast(pw + 1) * + static_cast(roi_width) / + static_cast(pooled_width))); hstart = min(max(hstart + roi_start_h, 0), height); hend = min(max(hend + roi_start_h, 0), height); wstart = min(max(wstart + roi_start_w, 0), width); @@ -79,9 +84,9 @@ __global__ void GPUROIPoolForward( } } } - output_data[index] = maxval; + output_data[i] = maxval; if (argmax_data) { - argmax_data[index] = maxidx; + argmax_data[i] = maxidx; } } } @@ -96,10 +101,10 @@ __global__ void GPUROIPoolBackward( int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; for (int i = index; i < nthreads; i += offset) { - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; + int pw = i % pooled_width; + int ph = (i / pooled_width) % pooled_height; + int c = (i / pooled_width / pooled_height) % channels; + int n = i / pooled_width / pooled_height / channels; int roi_batch_ind = roi_batch_id_data[n]; int input_offset = (roi_batch_ind * channels + c) * height * width; @@ -138,6 +143,7 @@ class GPUROIPoolOpKernel : public framework::OpKernel { int width = in_dims[3]; int rois_num = rois->dims()[0]; + if (rois_num == 0) return; int output_size = out->numel(); diff --git a/paddle/fluid/operators/send_recv_op_test.cc b/paddle/fluid/operators/send_recv_op_test.cc index eb51f301bfe2a97c65dd1fec23ff5a44f3843b05..d5303eaf50722234d205264e56892b1723104d53 100644 --- a/paddle/fluid/operators/send_recv_op_test.cc +++ b/paddle/fluid/operators/send_recv_op_test.cc @@ -92,12 +92,16 @@ void InitSelectedRowsInScope(const p::CPUPlace &place, f::Scope *scope) { void AddOp(const std::string &type, const f::VariableNameMap &inputs, const f::VariableNameMap &outputs, f::AttributeMap attrs, - f::BlockDesc *block) { + f::BlockDesc *block, bool is_sparse) { // insert output for (auto kv : outputs) { for (auto v : kv.second) { auto var = block->Var(v); var->SetDataType(f::proto::VarType::FP32); + var->SetPersistable(true); + if (is_sparse) { + var->SetType(f::proto::VarType::SELECTED_ROWS); + } } } @@ -128,7 +132,8 @@ void StartServerNet(bool is_sparse, std::atomic *initialized) { auto *optimize_block = program.AppendBlock(root_block); auto *prefetch_block = program.AppendBlock(root_block); // X for server side tensors, RX for received tensors, must be of same shape. - AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block); + AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block, + is_sparse); f::AttributeMap attrs; attrs.insert({"endpoint", std::string("127.0.0.1:0")}); attrs.insert({"Fanin", 1}); diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 79e3c26fef51b4d27520a8079de1074d72f89617..b29035bafd34fa81dc6b59691142fe74439202b8 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -49,7 +49,7 @@ nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_ nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) -cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto ${GPU_CTX_DEPS}) +cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 23f1d615daab91f0e4b353bc7d9a3ca7f5cec5ae..56ed5912a15437b72b769610912c7493d77e5964 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -71,5 +71,15 @@ inline bool CanMKLDNNBeUsed(const framework::ExecutionContext& ctx) { return use_mkldnn && platform::is_cpu_place(ctx.GetPlace()); } +template +mkldnn::memory::data_type MKLDNNGetDataType() { + return mkldnn::memory::data_undef; +} + +template <> +inline mkldnn::memory::data_type MKLDNNGetDataType() { + return mkldnn::memory::f32; +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 50bc0aba6aa0f056dc0b2d49f6b3b745433e0756..2fb5c6dc6b8ad25fa1ad5fcf7c2acfedd5be4a83 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -173,8 +173,9 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) { } RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx) - : start_ns_(PosixInNsec()) { + : is_enabled_(false), start_ns_(PosixInNsec()) { if (g_state == ProfilerState::kDisabled) return; + is_enabled_ = true; dev_ctx_ = dev_ctx; name_ = name; PushEvent(name_, dev_ctx_); @@ -183,7 +184,7 @@ RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx) } RecordEvent::~RecordEvent() { - if (g_state == ProfilerState::kDisabled) return; + if (g_state == ProfilerState::kDisabled || !is_enabled_) return; DeviceTracer* tracer = GetDeviceTracer(); if (tracer) { tracer->AddCPURecords(CurAnnotation(), start_ns_, PosixInNsec(), @@ -193,14 +194,16 @@ RecordEvent::~RecordEvent() { PopEvent(name_, dev_ctx_); } -RecordBlock::RecordBlock(int block_id) : start_ns_(PosixInNsec()) { +RecordBlock::RecordBlock(int block_id) + : is_enabled_(false), start_ns_(PosixInNsec()) { if (g_state == ProfilerState::kDisabled) return; + is_enabled_ = true; SetCurBlock(block_id); name_ = string::Sprintf("block_%d", block_id); } RecordBlock::~RecordBlock() { - if (g_state == ProfilerState::kDisabled) return; + if (g_state == ProfilerState::kDisabled || !is_enabled_) return; DeviceTracer* tracer = GetDeviceTracer(); if (tracer) { // We try to put all blocks at the same nested depth in the diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index 61b98143e41abb9e47d2c717c7876f1bab7f5077..643bb6183d144ec11a4890d9ea1ca970acb08b4c 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -74,6 +74,7 @@ struct RecordEvent { ~RecordEvent(); + bool is_enabled_; uint64_t start_ns_; // The device context is used by Event to get the current cuda stream. const DeviceContext* dev_ctx_; @@ -89,6 +90,7 @@ struct RecordBlock { ~RecordBlock(); private: + bool is_enabled_; std::string name_; uint64_t start_ns_; }; diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 6471eb3ab7bf05365c0bb2bf68bb74ef9044c527..bcf6d4dd3087060c016e53722cde80704ef2e834 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -238,6 +238,7 @@ void BindVarDsec(pybind11::module *m) { pybind11::enum_(var_desc, "VarType", "") .value("BOOL", pd::proto::VarType::BOOL) + .value("UINT8", pd::proto::VarType::UINT8) .value("INT16", pd::proto::VarType::INT16) .value("INT32", pd::proto::VarType::INT32) .value("INT64", pd::proto::VarType::INT64) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 7e00bd38487902227c3b4521db20cdbe314059be..92b8b90880bc78dbc281a959a7472c2822f76fc3 100755 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -198,7 +198,7 @@ EOF # run paddle version to install python packages first RUN apt-get update &&\ ${NCCL_DEPS}\ - apt-get install -y wget python-pip dmidecode python-tk && pip install -U pip==9.0.3 && \ + apt-get install -y wget python-pip dmidecode python-tk && easy_install -U pip && \ pip install /*.whl; apt-get install -f -y && \ apt-get clean -y && \ rm -f /*.whl && \ diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 58a30ab3e522c5b14ac8c72d4b03667de6720425..fbe219a1c9cf85f19ae2ab991ae7e4207858f204 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -95,7 +95,6 @@ function cmake_gen() { -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} - -DWITH_SWIG_PY=ON -DWITH_C_API=${WITH_C_API:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} @@ -406,17 +405,19 @@ EOF function gen_dockerfile() { # Set BASE_IMAGE according to env variables + CUDA_MAJOR="$(echo $CUDA_VERSION | cut -d '.' -f 1).$(echo $CUDA_VERSION | cut -d '.' -f 2)" + CUDNN_MAJOR=$(echo $CUDNN_VERSION | cut -d '.' -f 1) if [[ ${WITH_GPU} == "ON" ]]; then - BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04" + BASE_IMAGE="nvidia/cuda:${CUDA_MAJOR}-cudnn${CUDNN_MAJOR}-runtime-ubuntu16.04" else - BASE_IMAGE="ubuntu:16.04" + BASE_IMAGE="ubuntu:16.04" fi DOCKERFILE_GPU_ENV="" DOCKERFILE_CUDNN_DSO="" if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:\${LD_LIBRARY_PATH}" - DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so" + DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.${CUDNN_MAJOR} /usr/lib/x86_64-linux-gnu/libcudnn.so" fi cat < -#include -#include -#include -#include -#include - -#include "mkldnn.h" -#endif - -namespace mkldnn { - -/// @addtogroup cpp_api C++ API -/// @{ - -/// @addtogroup cpp_api_utils Utils -/// @{ - -/// A class that provides the destructor for an Intel(R) MKL-DNN C handle -template -class handle_traits {}; - -/// A class for wrapping an Intel(R) MKL-DNN handle. It is used as the base -/// class for primitive (#mkldnn_primitive_t), engine (#mkldnn_engine_t), and -/// stream (#mkldnn_stream_t) handles. An object of the #mkldnn::handle class -/// can be passed by value. This class enables wrapping: -/// - Newly constructed handles. -/// @n In this case, the constructed handle uses reference counting provided -/// by @p std::shared_ptr with a proper deleter function specified through -/// the @p handle_traits class. -/// - Pre-existing handles returned by the Intel(R) MKL-DNN C API (for -/// example, through #mkldnn_primitive_get_output()). -/// @n In this case, an Intel(R) MKL-DNN C API handle is wrapped without a -/// deleter because it is assumed that the handle wrapper for the original -/// object deletes the handle (this model is similar to @p std::weak_ptr). -template > -class handle { -private: - std::shared_ptr::type> _data; - handle(const handle &&) = delete; - handle &operator=(const handle &&other) = delete; - -protected: - /// Constructs a C handle wrapper. - /// @param t The C handle to wrap. - /// @param weak A flag to specify whether to construct a weak wrapper. - handle(T t = 0, bool weak = false) : _data(0) { reset(t, weak); } - - bool operator==(const T other) const { return other == _data.get(); } - bool operator!=(const T other) const { return !(*this == other); } - -public: - handle(const handle &other) : _data(other._data) {} - handle &operator=(const handle &other) { - _data = other._data; - return *this; - } - /// Resets the value of a C handle. - /// @param t The new value of the C handle. - /// @param weak A flag to specify whether the wrapper should be weak. - void reset(T t, bool weak = false) { - auto dummy_destructor = [](T) { - return decltype(traits::destructor(0))(0); - }; - _data.reset(t, weak ? dummy_destructor : traits::destructor); - } - - /// Returns the value of the underlying C handle. - T get() const { return _data.get(); } - - bool operator==(const handle &other) const { - return other._data.get() == _data.get(); - } - bool operator!=(const handle &other) const { return !(*this == other); } -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template <> -struct handle_traits { - static constexpr auto destructor = &mkldnn_primitive_desc_destroy; -}; - -template <> -struct handle_traits { - static constexpr auto destructor = &mkldnn_primitive_destroy; -}; -#endif - -/// Base class for all computational primitives. -class primitive : public handle { - friend struct error; - friend struct stream; - friend class primitive_at; - using handle::handle; - -public: - /// A proxy to C primitive kind enum - enum class kind { - undefined_primitive = mkldnn_undefined_primitive, - memory = mkldnn_memory, - view = mkldnn_view, - reorder = mkldnn_reorder, - concat = mkldnn_concat, - concat_inplace = mkldnn_concat_inplace, - sum = mkldnn_sum, - convolution = mkldnn_convolution, - deconvolution = mkldnn_deconvolution, - eltwise = mkldnn_eltwise, - relu = mkldnn_relu, - softmax = mkldnn_softmax, - pooling = mkldnn_pooling, - lrn = mkldnn_lrn, - batch_normalization = mkldnn_batch_normalization, - inner_product = mkldnn_inner_product, - convolution_relu = mkldnn_convolution_relu, - rnn = mkldnn_rnn, - }; - - /// A wrapper structure to specify a particular output of a primitive. - struct at { - /// The underlying C API structure. - mkldnn_primitive_at_t data; - /// Constructs a wrapper specifying @p aprimitive output with index @p - /// at. - /// - /// @param aprimitive The target primitive. - /// @param at The output index. - - at(const primitive &aprimitive, size_t at = 0) - : data(mkldnn_primitive_at(aprimitive.get(), at)) {} - /// Returns the specified output. - inline operator primitive() const; - }; - - /// Returns the descriptor of the underlying C API primitive - inline const_mkldnn_primitive_desc_t get_primitive_desc() const; - // TODO: use the C++ API wrapper structure. -}; - -inline mkldnn_primitive_kind_t convert_to_c(primitive::kind akind) { - return static_cast(akind); -} - -/// Intel(R) MKL-DNN exception class. -/// -/// This class captures the status returned by the failed C API function, error -/// message, and, optionally, handle of the primitive that caused the error. -struct error : public std::exception { - mkldnn_status_t status; - std::string message; - primitive error_primitive; - - /// Constructs an error instance. - /// - /// @param astatus The error status returned by the C API. - /// @param amessage The error message. - /// @param aerror_primitive (optional) A C handle of the primitive that - /// caused the error. - - error(mkldnn_status_t astatus, - std::string amessage, - mkldnn_primitive_t aerror_primitive = 0) - : status(astatus), - message(amessage), - error_primitive(aerror_primitive, true) {} - - /// A convenience function for wrapping calls to the C API. Checks the - /// return status and throws an #error in case of failure. - /// - /// @param status The error status returned by the C API. - /// @param message The error message. - /// @param error_primitive (optional) A C handle of the primitive that - /// caused the error. - - static void wrap_c_api(mkldnn_status_t status, - std::string message, - mkldnn_primitive_t *error_primitive = 0) { - if (status != mkldnn_success) { - if (nullptr != error_primitive) - throw error(status, message, *error_primitive); - else - throw error(status, message, nullptr); - } - } -}; - -inline primitive::at::operator primitive() const { - const_mkldnn_primitive_t output; - error::wrap_c_api( - mkldnn_primitive_get_output(data.primitive, data.output_index, &output), - "could not get an output primitive"); - return primitive(const_cast(output), true); -} - -const_mkldnn_primitive_desc_t primitive::get_primitive_desc() const { - const_mkldnn_primitive_desc_t pd; - error::wrap_c_api(mkldnn_primitive_get_primitive_desc(get(), &pd), - "could not get primitive descriptor by primitive"); - return pd; -} -/// @} - -/// @addtogroup cpp_api_enums Common data types and enumerations -/// @{ - -enum round_mode { - round_nearest = mkldnn_round_nearest, - round_down = mkldnn_round_down, -}; - -inline mkldnn_round_mode_t convert_to_c(round_mode mode) { - return static_cast(mode); -} - -enum padding_kind { zero = mkldnn_padding_zero }; - -inline mkldnn_padding_kind_t convert_to_c(padding_kind kind) { - return static_cast(kind); -} - -enum prop_kind { - forward_training = mkldnn_forward_training, - forward_scoring = mkldnn_forward_scoring, - forward_inference = mkldnn_forward_inference, - forward = mkldnn_forward, - backward = mkldnn_backward, - backward_data = mkldnn_backward_data, - backward_weights = mkldnn_backward_weights, - backward_bias = mkldnn_backward_bias -}; - -inline mkldnn_prop_kind_t convert_to_c(prop_kind kind) { - return static_cast(kind); -} - -enum algorithm { - algorithm_undef = mkldnn_alg_kind_undef, - convolution_direct = mkldnn_convolution_direct, - convolution_winograd = mkldnn_convolution_winograd, - deconvolution_direct = mkldnn_deconvolution_direct, - deconvolution_winograd = mkldnn_deconvolution_winograd, - eltwise_relu = mkldnn_eltwise_relu, - eltwise_tanh = mkldnn_eltwise_tanh, - eltwise_elu = mkldnn_eltwise_elu, - eltwise_square = mkldnn_eltwise_square, - eltwise_abs = mkldnn_eltwise_abs, - eltwise_sqrt = mkldnn_eltwise_sqrt, - eltwise_linear = mkldnn_eltwise_linear, - eltwise_bounded_relu = mkldnn_eltwise_bounded_relu, - eltwise_soft_relu = mkldnn_eltwise_soft_relu, - eltwise_logistic = mkldnn_eltwise_logistic, - lrn_across_channels = mkldnn_lrn_across_channels, - lrn_within_channel = mkldnn_lrn_within_channel, - pooling_max = mkldnn_pooling_max, - pooling_avg = mkldnn_pooling_avg, - pooling_avg_include_padding = mkldnn_pooling_avg_include_padding, - pooling_avg_exclude_padding = mkldnn_pooling_avg_exclude_padding, - vanilla_rnn = mkldnn_vanilla_rnn, - vanilla_lstm = mkldnn_vanilla_lstm, - vanilla_gru = mkldnn_vanilla_gru, -}; - -inline mkldnn_alg_kind_t convert_to_c(algorithm aalgorithm) { - return static_cast(aalgorithm); -} - -enum batch_normalization_flag { - use_global_stats = mkldnn_use_global_stats, - use_scale_shift = mkldnn_use_scaleshift, - omit_stats = mkldnn_omit_stats, - fuse_bn_relu = mkldnn_fuse_bn_relu -}; - -inline mkldnn_batch_normalization_flag_t convert_to_c( - batch_normalization_flag aflag) { - return static_cast(aflag); -} - -enum rnn_direction { - unidirectional_left2right = mkldnn_unidirectional_left2right, - unidirectional_right2left = mkldnn_unidirectional_right2left, - unidirectional = mkldnn_unidirectional, - bidirectional_concat = mkldnn_bidirectional_concat, - bidirectional_sum = mkldnn_bidirectional_sum, -}; - -inline mkldnn_rnn_direction_t convert_to_c(rnn_direction adir) { - return static_cast(adir); -} - -enum query { - undef = mkldnn_query_undef, - - eengine = mkldnn_query_engine, - primitive_kind = mkldnn_query_primitive_kind, - - num_of_inputs_s32 = mkldnn_query_num_of_inputs_s32, - num_of_outputs_s32 = mkldnn_query_num_of_outputs_s32, - - time_estimate_f64 = mkldnn_query_time_estimate_f64, - memory_consumption_s64 = mkldnn_query_memory_consumption_s64, - - impl_info_str = mkldnn_query_impl_info_str, - - memory_d = mkldnn_query_memory_d, - convolution_d = mkldnn_query_convolution_d, - deconvolution_d = mkldnn_query_deconvolution_d, - eltwise_d = mkldnn_query_eltwise_d, - relu_d = mkldnn_query_relu_d, - softmax_d = mkldnn_query_softmax_d, - pooling_d = mkldnn_query_pooling_d, - lrn_d = mkldnn_query_lrn_d, - batch_normalization_d = mkldnn_query_batch_normalization_d, - inner_product_d = mkldnn_query_inner_product_d, - convolution_relu_d = mkldnn_query_convolution_relu_d, - rnn_d = mkldnn_query_rnn_d, - - input_pd = mkldnn_query_input_pd, - output_pd = mkldnn_query_output_pd, - src_pd = mkldnn_query_src_pd, - diff_src_pd = mkldnn_query_diff_src_pd, - weights_pd = mkldnn_query_weights_pd, - diff_weights_pd = mkldnn_query_diff_weights_pd, - dst_pd = mkldnn_query_dst_pd, - diff_dst_pd = mkldnn_query_diff_dst_pd, - workspace_pd = mkldnn_query_workspace_pd, -}; - -inline mkldnn_query_t convert_to_c(query aquery) { - return static_cast(aquery); -} - -/// @} - -/// @addtogroup cpp_api_attr Attributes -/// @{ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template <> -struct handle_traits { - static constexpr auto destructor = &mkldnn_post_ops_destroy; -}; -#endif - -struct post_ops : public handle { - post_ops() { - mkldnn_post_ops_t result; - error::wrap_c_api(mkldnn_post_ops_create(&result), - "could not create post operation sequence"); - reset(result); - } - - int len() const { return mkldnn_post_ops_len(get()); } - - primitive::kind kind(int index) const { - error::wrap_c_api(index < len() ? mkldnn_success : mkldnn_invalid_arguments, - "post_ops index is out of range"); - return static_cast(mkldnn_post_ops_get_kind(get(), index)); - } - - void append_sum(float scale = 1.) { - error::wrap_c_api(mkldnn_post_ops_append_sum(get(), scale), - "could not append sum"); - } - - void get_params_sum(int index, float &scale) const { - error::wrap_c_api(mkldnn_post_ops_get_params_sum(get(), index, &scale), - "could not get sum params"); - } - - void append_eltwise(float scale, algorithm alg, float alpha, float beta) { - error::wrap_c_api(mkldnn_post_ops_append_eltwise( - get(), scale, convert_to_c(alg), alpha, beta), - "could not append eltwise"); - } - - void get_params_eltwise(int index, - float &scale, - algorithm &alg, - float &alpha, - float &beta) const { - mkldnn_alg_kind_t c_alg; - error::wrap_c_api(mkldnn_post_ops_get_params_eltwise( - get(), index, &scale, &c_alg, &alpha, &beta), - "could not get eltwise params"); - alg = static_cast(c_alg); - } -}; - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template <> -struct handle_traits { - static constexpr auto destructor = &mkldnn_primitive_attr_destroy; -}; -#endif - -struct primitive_attr : public handle { - primitive_attr() { - mkldnn_primitive_attr_t result; - error::wrap_c_api(mkldnn_primitive_attr_create(&result), - "could not create a primitive attr"); - reset(result); - } - - round_mode get_int_output_round_mode() const { - mkldnn_round_mode_t result; - error::wrap_c_api( - mkldnn_primitive_attr_get_int_output_round_mode(get(), &result), - "could not get int output round mode"); - return round_mode(result); - } - - void set_int_output_round_mode(round_mode mode) { - error::wrap_c_api(mkldnn_primitive_attr_set_int_output_round_mode( - get(), mkldnn::convert_to_c(mode)), - "could not set int output round mode"); - } - - void get_output_scales(int &mask, std::vector &scales) const { - int count, c_mask; - const float *c_scales; - error::wrap_c_api(mkldnn_primitive_attr_get_output_scales( - get(), &count, &c_mask, &c_scales), - "could not get int output scales"); - scales.resize(count); - - mask = c_mask; - for (int c = 0; c < count; ++c) scales[c] = c_scales[c]; - } - - void set_output_scales(int mask, const std::vector &scales) { - error::wrap_c_api(mkldnn_primitive_attr_set_output_scales( - get(), (int)scales.size(), mask, &scales[0]), - "could not set int output scales"); - } - - const post_ops get_post_ops() const { - post_ops result; - const_mkldnn_post_ops_t c_result; - error::wrap_c_api(mkldnn_primitive_attr_get_post_ops(get(), &c_result), - "could not get post operation sequence"); - result.reset(const_cast(c_result), true); - return result; - } - - void set_post_ops(post_ops ops) { - error::wrap_c_api(mkldnn_primitive_attr_set_post_ops(get(), ops.get()), - "could not set post operation sequence"); - } -}; - -/// @} - -/// @addtogroup cpp_api_engine Engine -/// @{ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template <> -struct handle_traits { - static constexpr auto destructor = &mkldnn_engine_destroy; -}; -#endif - -/// An execution engine. -struct engine : public handle { - friend class primitive; - // gcc bug??? using handle::handle; - - /// Kinds of engines - enum kind { - /// An unspecified engine - any = mkldnn_any_engine, - /// CPU engine - cpu = mkldnn_cpu, - }; - - /// Returns the number of engines of a certain kind. - /// - /// @param akind The kind of engines to count. - - static size_t get_count(kind akind) { - return mkldnn_engine_get_count(convert_to_c(akind)); - } - - /// Constructs an engine. - /// - /// @param akind The kind of engine to construct. - /// @param index The index of the engine. Must be less than the value - /// returned by #get_count() for this particular kind of engine. - - engine(kind akind, size_t index) { - mkldnn_engine_t aengine; - error::wrap_c_api( - mkldnn_engine_create(&aengine, convert_to_c(akind), index), - "could not create an engine"); - reset(aengine); - } - - explicit engine(const mkldnn_engine_t &aengine) : handle(aengine, true) {} - - engine(const handle &pd) { - mkldnn_engine_t engine_q; - error::wrap_c_api( - mkldnn_primitive_desc_query( - pd.get(), mkldnn::convert_to_c(eengine), 0, &engine_q), - "could not get engine from primitive_desc"); - reset(engine_q, true); - } - - template - static engine query(const primitive_desc &pd) { - mkldnn_engine_t engine_q; - error::wrap_c_api( - mkldnn_primitive_desc_query( - pd.get(), mkldnn::convert_to_c(eengine), 0, &engine_q), - "could not get engine from primitive_desc"); - - return engine(engine_q); - } - -private: - static mkldnn_engine_kind_t convert_to_c(kind akind) { - return static_cast(akind); - } -}; - -/// @} - -/// @addtogroup cpp_api_primitives Primitives -/// @{ - -/// @addtogroup cpp_api_memory Memory -/// @{ - -/// Memory primitive that describes the data. -struct memory : public primitive { -private: - std::shared_ptr _handle; - -public: - typedef std::vector::type> dims; - - template - static void validate_dims(std::vector v) { - if (v.size() > TENSOR_MAX_DIMS) - throw error(mkldnn_invalid_arguments, "invalid dimensions"); - } - - /// Data type specification. See #mkldnn_data_type_t for a detailed - /// description. - enum data_type { - data_undef = mkldnn_data_type_undef, - f32 = mkldnn_f32, - s32 = mkldnn_s32, - s16 = mkldnn_s16, - s8 = mkldnn_s8, - u8 = mkldnn_u8, - }; - - /// Memory format specification. See #mkldnn_memory_format_t - /// for a detailed description. - enum format { - format_undef = mkldnn_format_undef, - any = mkldnn_any, - blocked = mkldnn_blocked, - x = mkldnn_x, - nc = mkldnn_nc, - nchw = mkldnn_nchw, - nhwc = mkldnn_nhwc, - chwn = mkldnn_chwn, - nChw8c = mkldnn_nChw8c, - nChw16c = mkldnn_nChw16c, - ncdhw = mkldnn_ncdhw, - ndhwc = mkldnn_ndhwc, - nCdhw16c = mkldnn_nCdhw16c, - oi = mkldnn_oi, - io = mkldnn_io, - oihw = mkldnn_oihw, - ihwo = mkldnn_ihwo, - hwio = mkldnn_hwio, - oidhw = mkldnn_oidhw, - OIdhw16i16o = mkldnn_OIdhw16i16o, - OIdhw16o16i = mkldnn_OIdhw16o16i, - Oidhw16o = mkldnn_Oidhw16o, - Odhwi16o = mkldnn_Odhwi16o, - oIhw8i = mkldnn_oIhw8i, - oIhw16i = mkldnn_oIhw16i, - OIhw8i8o = mkldnn_OIhw8i8o, - OIhw16i16o = mkldnn_OIhw16i16o, - OIhw8o8i = mkldnn_OIhw8o8i, - OIhw16o16i = mkldnn_OIhw16o16i, - IOhw16o16i = mkldnn_IOhw16o16i, - OIhw8i16o2i = mkldnn_OIhw8i16o2i, - OIhw8o16i2o = mkldnn_OIhw8o16i2o, - OIhw4i16o4i = mkldnn_OIhw4i16o4i, - Oihw8o = mkldnn_Oihw8o, - Oihw16o = mkldnn_Oihw16o, - Ohwi8o = mkldnn_Ohwi8o, - Ohwi16o = mkldnn_Ohwi16o, - OhIw16o4i = mkldnn_OhIw16o4i, - goihw = mkldnn_goihw, - hwigo = mkldnn_hwigo, - gOIhw8i8o = mkldnn_gOIhw8i8o, - gOIhw16i16o = mkldnn_gOIhw16i16o, - gOIhw8i16o2i = mkldnn_gOIhw8i16o2i, - gOIhw8o16i2o = mkldnn_gOIhw8o16i2o, - gOIhw4i16o4i = mkldnn_gOIhw4i16o4i, - gOihw8o = mkldnn_gOihw8o, - gOihw16o = mkldnn_gOihw16o, - gOhwi8o = mkldnn_gOhwi8o, - gOhwi16o = mkldnn_gOhwi16o, - Goihw8g = mkldnn_Goihw8g, - Goihw16g = mkldnn_Goihw16g, - gOIhw8o8i = mkldnn_gOIhw8o8i, - gOIhw16o16i = mkldnn_gOIhw16o16i, - gIOhw16o16i = mkldnn_gIOhw16o16i, - gOhIw16o4i = mkldnn_gOhIw16o4i, - goidhw = mkldnn_goidhw, - gOIdhw16i16o = mkldnn_gOIdhw16i16o, - gOIdhw16o16i = mkldnn_gOIdhw16o16i, - gOidhw16o = mkldnn_gOidhw16o, - gOdhwi16o = mkldnn_gOdhwi16o, - ntc = mkldnn_ntc, - tnc = mkldnn_tnc, - ldsnc = mkldnn_ldsnc, - ldigo = mkldnn_ldigo, - ldigo_p = mkldnn_ldigo_p, - ldgoi = mkldnn_ldgoi, - ldgoi_p = mkldnn_ldgoi_p, - ldgo = mkldnn_ldgo, - wino_fmt = mkldnn_wino_fmt, - format_last = mkldnn_format_last, - }; - - /// A memory descriptor. - struct desc { - friend struct memory; - /// The underlying C API data structure. - mkldnn_memory_desc_t data; - - /// Constructs a memory descriptor. - /// - /// @param adims Data dimensions - /// @param adata_type Data precision/type. - /// @param aformat Data layout format. - desc(dims adims, data_type adata_type, format aformat) { - validate_dims(adims); - error::wrap_c_api( - mkldnn_memory_desc_init(&data, - (int)adims.size(), - adims.size() == 0 ? nullptr : &adims[0], - convert_to_c(adata_type), - convert_to_c(aformat)), - "could not initialize a memory descriptor"); - } - - /// Constructs a memory descriptor from a C API data structure. - /// - /// @param adata A C API #mkldnn_memory_desc_t structure. - desc(const mkldnn_memory_desc_t &adata) : data(adata) {} - }; - - /// A memory primitive descriptor. - struct primitive_desc : public handle { - friend struct memory; - - // TODO: make private - primitive_desc() {} - - /// Constructs a memory primitive descriptor. - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api(mkldnn_memory_primitive_desc_create( - &result, &adesc.data, aengine.get()), - "could not initialize a memory primitive descriptor"); - reset(result); - } - - /// Returns the memory primitive descriptor. - memory::desc desc() { - auto memory_d = mkldnn_primitive_desc_query_memory_d(get()); - return memory::desc(*memory_d); - } - - /// Returns the number of bytes required to allocate the memory described - /// including the padding area. - size_t get_size() const { - return mkldnn_memory_primitive_desc_get_size(get()); - } - - bool operator==(const primitive_desc &other) const { - return mkldnn_memory_primitive_desc_equal(get(), other.get()); - } - - bool operator!=(const primitive_desc &other) const { - return !operator==(other); - } - - engine get_engine() { return engine::query(*this); } - }; - - /// Constructs a memory primitive from a generic primitive. - /// - /// @param aprimitive The primitive to treat as memory. - memory(const primitive &aprimitive) : primitive(aprimitive) {} - /// Constructs a memory primitive. - /// - /// @param adesc Memory primitive descriptor. - memory(const primitive_desc &adesc) { - mkldnn_primitive_t result; - error::wrap_c_api( - mkldnn_primitive_create(&result, adesc.get(), nullptr, nullptr), - "could not create a memory primitive"); - reset(result); - auto _malloc = [](size_t size, int alignment) { - void *ptr; -#ifdef _WIN32 - ptr = _aligned_malloc(size, alignment); - int rc = ((ptr) ? 0 : errno); -#else - int rc = ::posix_memalign(&ptr, alignment, size); -#endif /* _WIN32 */ - return (rc == 0) ? (char *)ptr : nullptr; - }; - auto _free = [](char *p) { -#ifdef _WIN32 - _aligned_free((void *)p); -#else - ::free((void *)p); -#endif /* _WIN32 */ - }; - _handle.reset(_malloc(adesc.get_size(), 4096), _free); - set_data_handle(_handle.get()); - } - - memory(const primitive_desc &adesc, void *ahandle) { - mkldnn_primitive_t result; - error::wrap_c_api( - mkldnn_primitive_create(&result, adesc.get(), nullptr, nullptr), - "could not create a memory primitive"); - reset(result); - set_data_handle(ahandle); - } - - /// Returns the descriptor of the memory primitive. - primitive_desc get_primitive_desc() const { - primitive_desc adesc; - const_mkldnn_primitive_desc_t cdesc; - error::wrap_c_api( - mkldnn_primitive_get_primitive_desc(get(), &cdesc), - "could not get primitive descriptor from a memory primitive"); - /* FIXME: no const_cast should be here */ - adesc.reset(const_cast(cdesc), true); - return adesc; - } - - /// Returns a handle of the data contained in the memory primitive. On - /// the CPU engine, this is a pointer to the allocated memory. - inline void *get_data_handle() const { - void *handle; - error::wrap_c_api(mkldnn_memory_get_data_handle(get(), &handle), - "could not get native handle"); - return handle; - } - - inline void set_data_handle(void *handle) const { - error::wrap_c_api(mkldnn_memory_set_data_handle(get(), handle), - "could not set native handle"); - } - - // Must go away or be private: - static mkldnn_data_type_t convert_to_c(data_type adata_type) { - return static_cast(adata_type); - } - static mkldnn_memory_format_t convert_to_c(format aformat) { - return static_cast(aformat); - } -}; - -inline memory::desc zero_md() { - mkldnn_memory_desc_t zero; - zero.primitive_kind = mkldnn_memory; - return memory::desc(zero); -} - -inline memory null_memory(engine eng) { - mkldnn::memory::desc zero = zero_md(); - return memory({zero, eng}, nullptr); -} - -inline bool is_null_memory(const const_mkldnn_primitive_t &aprimitive) { - const_mkldnn_primitive_desc_t aprimitive_pd; - mkldnn_primitive_get_primitive_desc(aprimitive, &aprimitive_pd); - const mkldnn_memory_desc_t *aprimitive_md = - mkldnn_primitive_desc_query_memory_d(aprimitive_pd); - - return ((aprimitive_md != nullptr) && (aprimitive_md->ndims == 0)); -} - -inline bool operator==(mkldnn_data_type_t a, memory::data_type b) { - return a == memory::convert_to_c(b); -} -inline bool operator!=(mkldnn_data_type_t a, memory::data_type b) { - return !(a == b); -} -inline bool operator==(memory::data_type a, mkldnn_data_type_t b) { - return b == a; -} -inline bool operator!=(memory::data_type a, mkldnn_data_type_t b) { - return !(a == b); -} - -inline bool operator==(mkldnn_memory_format_t a, memory::format b) { - return a == memory::convert_to_c(b); -} -inline bool operator!=(mkldnn_memory_format_t a, memory::format b) { - return !(a == b); -} -inline bool operator==(memory::format a, mkldnn_memory_format_t b) { - return b == a; -} -inline bool operator!=(memory::format a, mkldnn_memory_format_t b) { - return !(a == b); -} - -/// @} - -/// @addtogroup cpp_api_reorder Reorder -/// @{ - -struct reorder : public primitive { - struct primitive_desc : public handle { - primitive_desc(const memory::primitive_desc &input, - const memory::primitive_desc &output) { - mkldnn_primitive_desc_t result; - error::wrap_c_api(mkldnn_reorder_primitive_desc_create( - &result, input.get(), output.get()), - "could not create a reorder primitive descriptor"); - reset(result); - } - - primitive_desc(const memory::primitive_desc &input, - const memory::primitive_desc &output, - const primitive_attr &aattr) { - mkldnn_primitive_desc_t result; - error::wrap_c_api(mkldnn_reorder_primitive_desc_create_v2( - &result, input.get(), output.get(), aattr.get()), - "could not create a reorder primitive descriptor"); - reset(result); - } - - engine get_engine() { return engine::query(*this); } - }; - - reorder(const primitive_desc &aprimitive_desc, - const primitive::at &input, - const memory &output) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {input.data}; - const_mkldnn_primitive_t outputs[] = {output.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a reorder primitive"); - reset(result); - } - - reorder(const primitive::at &input, const memory &output) { - auto input_mpd = memory(input).get_primitive_desc(); - auto output_mpd = output.get_primitive_desc(); - - auto reorder_d = primitive_desc(input_mpd, output_mpd); - - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {input.data}; - const_mkldnn_primitive_t outputs[] = {output.get()}; - error::wrap_c_api( - mkldnn_primitive_create(&result, reorder_d.get(), inputs, outputs), - "could not create a reorder primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_view View -/// @{ - -struct view : public primitive { - struct primitive_desc : public handle { - primitive_desc(const memory::primitive_desc &input, - memory::dims dims, - memory::dims offsets) { - mkldnn_primitive_desc_t result; - - error::wrap_c_api(mkldnn_view_primitive_desc_create( - &result, input.get(), &dims[0], &offsets[0]), - "could not create a view primitive descriptor"); - reset(result); - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - view(const primitive_desc &view_pd, primitive::at input) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {input.data}; - error::wrap_c_api( - mkldnn_primitive_create(&result, view_pd.get(), inputs, nullptr), - "could not create a view primitive"); - reset(result); - } - - view(memory input, memory::dims dims, memory::dims offsets) { - mkldnn_primitive_t result; - primitive_desc view_pd(input.get_primitive_desc(), dims, offsets); - mkldnn_primitive_at_t inputs[] = {primitive::at(input).data}; - error::wrap_c_api( - mkldnn_primitive_create(&result, view_pd.get(), inputs, nullptr), - "could not create a view primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_concat Concat -/// @{ - -struct concat : public primitive { - struct primitive_desc : public handle { - std::vector cpp_to_c( - std::vector inputs) { - std::vector c_api_inputs; - c_api_inputs.reserve(inputs.size()); - auto convert_to_c = [](memory::primitive_desc d) { return d.get(); }; - std::transform(inputs.begin(), - inputs.end(), - std::back_inserter(c_api_inputs), - convert_to_c); - return c_api_inputs; - } - - primitive_desc(const memory::desc &output, - int concat_dimension, - std::vector inputs) { - mkldnn_primitive_desc_t result; - - auto c_api_inputs = cpp_to_c(inputs); - - error::wrap_c_api( - mkldnn_concat_primitive_desc_create(&result, - &output.data, - (int)c_api_inputs.size(), - concat_dimension, - &c_api_inputs[0]), - "could not create a concat primitive descriptor"); - reset(result); - } - - primitive_desc(int concat_dimension, - std::vector inputs) { - mkldnn_primitive_desc_t result; - - auto c_api_inputs = cpp_to_c(inputs); - - error::wrap_c_api( - mkldnn_concat_primitive_desc_create(&result, - nullptr, - (int)c_api_inputs.size(), - concat_dimension, - &c_api_inputs[0]), - "could not create a concat primitive descriptor"); - reset(result); - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - concat(const primitive_desc &concat_pd, - std::vector &inputs, - const memory &output) { - mkldnn_primitive_t result; - - std::vector p_inputs; - for (size_t i = 0; i < inputs.size(); i++) - p_inputs.push_back(inputs[i].data); - const_mkldnn_primitive_t outputs[] = {output.get()}; - - error::wrap_c_api(mkldnn_primitive_create( - &result, concat_pd.get(), &p_inputs[0], outputs), - "could not create a concat primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_sum Sum -/// @{ - -struct sum : public primitive { - struct primitive_desc : public handle { - std::vector cpp_to_c( - std::vector inputs) { - std::vector c_api_inputs; - c_api_inputs.reserve(inputs.size()); - auto convert_to_c = [](memory::primitive_desc d) { return d.get(); }; - std::transform(inputs.begin(), - inputs.end(), - std::back_inserter(c_api_inputs), - convert_to_c); - return c_api_inputs; - } - - primitive_desc(const memory::desc &output, - const std::vector &scales, - std::vector inputs) { - mkldnn_primitive_desc_t result; - - auto c_api_inputs = cpp_to_c(inputs); - - error::wrap_c_api( - mkldnn_sum_primitive_desc_create(&result, - &output.data, - (int)c_api_inputs.size(), - &scales[0], - &c_api_inputs[0]), - "could not create a sum primitive descriptor"); - reset(result); - } - - primitive_desc(const std::vector &scales, - std::vector inputs) { - mkldnn_primitive_desc_t result; - - auto c_api_inputs = cpp_to_c(inputs); - - error::wrap_c_api( - mkldnn_sum_primitive_desc_create(&result, - nullptr, - (int)c_api_inputs.size(), - &scales[0], - &c_api_inputs[0]), - "could not create a sum primitive descriptor"); - reset(result); - } - - /** @deprecated: api backwards compatibility for double scales type */ - MKLDNN_DEPRECATED - primitive_desc(const memory::desc &output, - std::vector scale, - std::vector inputs) { - mkldnn_primitive_desc_t result; - - auto c_api_inputs = cpp_to_c(inputs); - auto scale_f = scale_to_float(scale); - - error::wrap_c_api( - mkldnn_sum_primitive_desc_create(&result, - &output.data, - (int)c_api_inputs.size(), - &scale_f[0], - &c_api_inputs[0]), - "could not create a sum primitive descriptor"); - reset(result); - } - - /** @deprecated: api backwards compatibility for double scales type */ - MKLDNN_DEPRECATED - primitive_desc(std::vector scale, - std::vector inputs) { - mkldnn_primitive_desc_t result; - - auto c_api_inputs = cpp_to_c(inputs); - auto scale_f = scale_to_float(scale); - - error::wrap_c_api( - mkldnn_sum_primitive_desc_create(&result, - nullptr, - (int)c_api_inputs.size(), - &scale_f[0], - &c_api_inputs[0]), - "could not create a sum primitive descriptor"); - reset(result); - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - sum(const primitive_desc &sum_pd, - std::vector &inputs, - const memory &output) { - mkldnn_primitive_t result; - - std::vector p_inputs; - for (size_t i = 0; i < inputs.size(); i++) - p_inputs.push_back(inputs[i].data); - const_mkldnn_primitive_t outputs[] = {output.get()}; - - error::wrap_c_api( - mkldnn_primitive_create(&result, sum_pd.get(), &p_inputs[0], outputs), - "could not create a sum primitive"); - reset(result); - } - -private: - static std::vector scale_to_float(const std::vector &vd) { - std::vector vf(vd.size()); - std::transform( - vd.begin(), vd.end(), vf.begin(), [=](double x) { return (float)x; }); - return vf; - } -}; - -/// @} - -/// @addtogroup cpp_api_convolution Convolution -/// @{ - -struct convolution_forward : public primitive { - struct desc { - mkldnn_convolution_desc_t data; - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &bias_desc, - const memory::desc &dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api(mkldnn_convolution_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - &weights_desc.data, - &bias_desc.data, - &dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution forward descriptor"); - } - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api(mkldnn_convolution_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - &weights_desc.data, - nullptr, - &dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution forward descriptor"); - } - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &bias_desc, - const memory::desc &dst_desc, - const memory::dims strides, - const memory::dims dilates, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(dilates); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_dilated_convolution_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - &weights_desc.data, - &bias_desc.data, - &dst_desc.data, - &strides[0], - &dilates[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a dilated convolution forward descriptor"); - } - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &dst_desc, - const memory::dims strides, - const memory::dims dilates, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(dilates); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_dilated_convolution_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - &weights_desc.data, - nullptr, - &dst_desc.data, - &strides[0], - &dilates[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a dilated convolution forward descriptor"); - } - }; - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a convolution forward primitive descriptor"); - reset(result); - } - - primitive_desc(const desc &adesc, - const primitive_attr &aattr, - const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create_v2( - &result, &adesc.data, aattr.get(), aengine.get(), nullptr), - "could not create a convolution forward primitive descriptor"); - reset(result); - } - - memory::primitive_desc src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - convolution_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const primitive::at &bias, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data, bias.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a convolution forward bias primitive"); - reset(result); - } - - convolution_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a convolution forward primitive"); - reset(result); - } -}; - -struct convolution_backward_data : public primitive { - struct desc { - mkldnn_convolution_desc_t data; - desc(algorithm aalgorithm, - const memory::desc &diff_src_desc, - const memory::desc &weights_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_convolution_backward_data_desc_init( - &data, - convert_to_c(aalgorithm), - &diff_src_desc.data, - &weights_desc.data, - &diff_dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution backward data descriptor"); - } - desc(algorithm aalgorithm, - const memory::desc &diff_src_desc, - const memory::desc &weights_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims dilates, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(dilates); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_dilated_convolution_backward_data_desc_init( - &data, - convert_to_c(aalgorithm), - &diff_src_desc.data, - &weights_desc.data, - &diff_dst_desc.data, - &strides[0], - &dilates[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution backward data descriptor"); - } - }; - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const convolution_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a convolution backward data primitive descriptor"); - reset(result); - } - memory::primitive_desc diff_src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_src primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - convolution_backward_data(const primitive_desc &aprimitive_desc, - const primitive::at &diff_dst, - const primitive::at &weights, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {diff_dst.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a convolution backward data primitive"); - reset(result); - } -}; - -struct convolution_backward_weights : public primitive { - struct desc { - mkldnn_convolution_desc_t data; - desc(algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_bias_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_convolution_backward_weights_desc_init( - &data, - convert_to_c(aalgorithm), - &src_desc.data, - &diff_weights_desc.data, - &diff_bias_desc.data, - &diff_dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution backward weights descriptor"); - } - desc(algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_convolution_backward_weights_desc_init( - &data, - convert_to_c(aalgorithm), - &src_desc.data, - &diff_weights_desc.data, - nullptr, - &diff_dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution backward weights descriptor"); - } - desc(algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_bias_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims dilates, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(dilates); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_dilated_convolution_backward_weights_desc_init( - &data, - convert_to_c(aalgorithm), - &src_desc.data, - &diff_weights_desc.data, - &diff_bias_desc.data, - &diff_dst_desc.data, - &strides[0], - &dilates[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution backward weights descriptor"); - } - desc(algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims dilates, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(dilates); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_dilated_convolution_backward_weights_desc_init( - &data, - convert_to_c(aalgorithm), - &src_desc.data, - &diff_weights_desc.data, - nullptr, - &diff_dst_desc.data, - &strides[0], - &dilates[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a convolution backward weights descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const convolution_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a convolution backward weights primitive " - "descriptor"); - reset(result); - } - memory::primitive_desc src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - convolution_backward_weights(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &diff_dst, - const memory &diff_weights, - const memory &diff_bias) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_weights.get(), diff_bias.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a convolution backward weights primitive"); - reset(result); - } - convolution_backward_weights(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &diff_dst, - const memory &diff_weights) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_weights.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a convolution backward weights primitive"); - reset(result); - } -}; - -struct convolution_relu_forward : public primitive { - struct desc { - mkldnn_convolution_relu_desc_t data; - desc(const convolution_forward::desc conv_desc, - const float negative_slope) { - error::wrap_c_api( - mkldnn_convolution_relu_desc_init( - &data, &conv_desc.data, negative_slope), - "could not create a convolution_relu_forward descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a convolution relu forward descriptor"); - reset(result); - } - - engine get_engine() { return engine::query(*this); } - }; - - convolution_relu_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const primitive::at &bias, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data, bias.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a convolution relu forward primitive"); - reset(result); - } - - convolution_relu_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a convolution relu forward primitive"); - reset(result); - } -}; - -/// @} -// -/// @addtogroup cpp_api_deconvolution Deconvolution -/// @{ - -struct deconvolution_forward : public primitive { - struct desc { - mkldnn_deconvolution_desc_t data; - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &bias_desc, - const memory::desc &dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api(mkldnn_deconvolution_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - &weights_desc.data, - &bias_desc.data, - &dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a deconvolution forward descriptor"); - } - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api(mkldnn_deconvolution_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - &weights_desc.data, - nullptr, - &dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a deconvolution forward descriptor"); - } - }; - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a deconvolution forward primitive descriptor"); - reset(result); - } - - primitive_desc(const desc &adesc, - const primitive_attr &aattr, - const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create_v2( - &result, &adesc.data, aattr.get(), aengine.get(), nullptr), - "could not create a deconvolution forward primitive descriptor"); - reset(result); - } - - memory::primitive_desc src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - deconvolution_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const primitive::at &bias, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data, bias.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a deconvolution forward bias primitive"); - reset(result); - } - - deconvolution_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a deconvolution forward primitive"); - reset(result); - } -}; - -struct deconvolution_backward_data : public primitive { - struct desc { - mkldnn_deconvolution_desc_t data; - desc(algorithm aalgorithm, - const memory::desc &diff_src_desc, - const memory::desc &weights_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_deconvolution_backward_data_desc_init( - &data, - convert_to_c(aalgorithm), - &diff_src_desc.data, - &weights_desc.data, - &diff_dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a deconvolution backward data descriptor"); - } - }; - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const deconvolution_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a deconvolution backward data primitive " - "descriptor"); - reset(result); - } - memory::primitive_desc diff_src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_src primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - deconvolution_backward_data(const primitive_desc &aprimitive_desc, - const primitive::at &diff_dst, - const primitive::at &weights, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {diff_dst.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a deconvolution backward data primitive"); - reset(result); - } -}; - -struct deconvolution_backward_weights : public primitive { - struct desc { - mkldnn_deconvolution_desc_t data; - desc(algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_bias_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_deconvolution_backward_weights_desc_init( - &data, - convert_to_c(aalgorithm), - &src_desc.data, - &diff_weights_desc.data, - &diff_bias_desc.data, - &diff_dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a deconvolution backward weights descriptor"); - } - desc(algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_dst_desc, - const memory::dims strides, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_deconvolution_backward_weights_desc_init( - &data, - convert_to_c(aalgorithm), - &src_desc.data, - &diff_weights_desc.data, - nullptr, - &diff_dst_desc.data, - &strides[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not create a deconvolution backward weights descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const deconvolution_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a deconvolution backward weights primitive " - "descriptor"); - reset(result); - } - memory::primitive_desc src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - deconvolution_backward_weights(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &diff_dst, - const memory &diff_weights, - const memory &diff_bias) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_weights.get(), diff_bias.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a deconvolution backward weights primitive"); - reset(result); - } - deconvolution_backward_weights(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &diff_dst, - const memory &diff_weights) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_weights.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a deconvolution backward weights primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_lrn LRN -/// @{ - -struct lrn_forward : public primitive { - struct desc { - mkldnn_lrn_desc_t data; - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - int local_size, - float alpha, - float beta, - float k) { - error::wrap_c_api( - mkldnn_lrn_forward_desc_init(&data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - local_size, - alpha, - beta, - k), - "could not create a lrn forward descriptor"); - } - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - int local_size, - float alpha, - float beta) { - error::wrap_c_api( - mkldnn_lrn_forward_desc_init(&data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - local_size, - alpha, - beta, - float(1.0)), - "could not create a lrn forward descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api(mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a lrn forward primitive descriptor"); - reset(result); - } - - memory::primitive_desc src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc workspace_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t ldesc; - const_mkldnn_primitive_desc_t const_ldesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(workspace_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&ldesc, const_ldesc), - "could not clone a workspace primitive descriptor"); - adesc.reset(ldesc); - return adesc; - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - lrn_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &workspace, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = {dst.get(), workspace.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a lrn forward primitive"); - reset(result); - } - - lrn_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a lrn forward primitive"); - reset(result); - } -}; - -struct lrn_backward : public primitive { - struct desc { - mkldnn_lrn_desc_t data; - desc(algorithm aalgorithm, - const memory::desc &data_desc, - const memory::desc &diff_data_desc, - int local_size, - float alpha, - float beta, - float k) { - error::wrap_c_api(mkldnn_lrn_backward_desc_init(&data, - convert_to_c(aalgorithm), - &diff_data_desc.data, - &data_desc.data, - local_size, - alpha, - beta, - k), - "could not create a lrn backward descriptor"); - } - desc(algorithm aalgorithm, - const memory::desc &data_desc, - const memory::desc &diff_data_desc, - int local_size, - float alpha, - float beta) { - error::wrap_c_api(mkldnn_lrn_backward_desc_init(&data, - convert_to_c(aalgorithm), - &diff_data_desc.data, - &data_desc.data, - local_size, - alpha, - beta, - float(1.0)), - "could not create a lrn backward descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, - const engine &aengine, - const lrn_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a backward lrn primitive descriptor"); - reset(result); - } - - memory::primitive_desc diff_src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_src primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc workspace_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t ldesc; - const_mkldnn_primitive_desc_t const_ldesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(workspace_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&ldesc, const_ldesc), - "could not clone a workspace primitive descriptor"); - adesc.reset(ldesc); - return adesc; - } - - memory::primitive_desc diff_dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff_dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - lrn_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &diff_dst, - const primitive::at &workspace, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data, workspace.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a lrn backward primitive"); - reset(result); - } - - lrn_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &diff_dst, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a lrn backward primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_pooling Pooling -/// @{ - -struct pooling_forward : public primitive { - struct desc { - mkldnn_pooling_desc_t data; - desc(prop_kind aprop_kind, - algorithm aalgorithm, - const memory::desc &src_desc, - const memory::desc &dst_desc, - const memory::dims strides, - const memory::dims kernel, - const memory::dims padding_l, - const memory::dims padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(kernel); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api( - mkldnn_pooling_forward_desc_init(&data, - mkldnn::convert_to_c(aprop_kind), - convert_to_c(aalgorithm), - &src_desc.data, - &dst_desc.data, - &strides[0], - &kernel[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not init a forward pooling descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a forward pooling primitive descriptor"); - reset(result); - } - - memory::primitive_desc workspace_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(workspace_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a workspace primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - pooling_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = {dst.get(), nullptr}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a pooling forward primitive"); - reset(result); - } - - pooling_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst, - const memory &workspace) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = {dst.get(), workspace.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a pooling forward primitive"); - reset(result); - } -}; - -struct pooling_backward : public primitive { - struct desc { - mkldnn_pooling_desc_t data; - desc(algorithm aalgorithm, - const memory::desc &diff_src_desc, - const memory::desc &diff_dst_desc, - const memory::dims &strides, - const memory::dims &kernel, - const memory::dims &padding_l, - const memory::dims &padding_r, - const padding_kind apadding_kind) { - memory::validate_dims(strides); - memory::validate_dims(kernel); - memory::validate_dims(padding_l); - memory::validate_dims(padding_r); - error::wrap_c_api(mkldnn_pooling_backward_desc_init( - &data, - convert_to_c(aalgorithm), - &diff_src_desc.data, - &diff_dst_desc.data, - &strides[0], - &kernel[0], - &padding_l[0], - &padding_r[0], - mkldnn::convert_to_c(apadding_kind)), - "could not init a backward pooling descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const pooling_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a backward pooling primitive descriptor"); - reset(result); - } - - memory::primitive_desc diff_src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff src primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - pooling_backward(const primitive_desc &aprimitive_desc, - const primitive::at &diff_dst, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a pooling backward primitive"); - reset(result); - } - - pooling_backward(const primitive_desc &aprimitive_desc, - const primitive::at &diff_dst, - const primitive::at &workspace, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {diff_dst.data, workspace.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a pooling backward primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_eltwise Eltwise -/// @{ - -struct eltwise_forward : public primitive { - struct desc { - mkldnn_eltwise_desc_t data; - template - desc(prop_kind aprop_kind, - algorithm alg_kind, - const memory::desc &src_desc, - T alpha = 0, - T beta = 0) { - error::wrap_c_api( - mkldnn_eltwise_forward_desc_init(&data, - mkldnn::convert_to_c(aprop_kind), - mkldnn::convert_to_c(alg_kind), - &src_desc.data, - static_cast(alpha), - static_cast(beta)), - "could not create a eltwise forward descriptor"); - } - - /** @deprecated: api backward compatibility for relu */ - template - MKLDNN_DEPRECATED desc(prop_kind aprop_kind, - const memory::desc &src_desc, - T negative_slope) - : desc(aprop_kind, eltwise_relu, src_desc, negative_slope) {} - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a eltwise forward primitive descriptor"); - reset(result); - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - eltwise_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a eltwise forward primitive"); - reset(result); - } -}; - -typedef eltwise_forward relu_forward; - -struct eltwise_backward : public primitive { - struct desc { - mkldnn_eltwise_desc_t data; - - template - desc(algorithm alg_kind, - const memory::desc &diff_data_desc, - const memory::desc &data_desc, - T alpha = 0, - T beta = 0) { - error::wrap_c_api( - mkldnn_eltwise_backward_desc_init(&data, - mkldnn::convert_to_c(alg_kind), - &diff_data_desc.data, - &data_desc.data, - static_cast(alpha), - static_cast(beta)), - "could not create a eltwise backward descriptor"); - } - - /** @deprecated: api backward compatibility for relu */ - template - MKLDNN_DEPRECATED desc(const memory::desc &diff_data_desc, - const memory::desc &data_desc, - T negative_slope) - : desc(eltwise_relu, diff_data_desc, data_desc, negative_slope) {} - }; - - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const eltwise_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a eltwise backward primitive descriptor"); - reset(result); - } - - memory::primitive_desc diff_src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff src primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - eltwise_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &diff_dst, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a eltwise backward primitive"); - reset(result); - } -}; - -typedef eltwise_backward relu_backward; - -/// @} - -/// @addtogroup cpp_api_softmax Softmax -/// @{ - -struct softmax_forward : public primitive { - struct desc { - mkldnn_softmax_desc_t data; - desc(prop_kind aprop_kind, - const memory::desc &data_desc, - int softmax_axis) { - error::wrap_c_api( - mkldnn_softmax_forward_desc_init(&data, - mkldnn::convert_to_c(aprop_kind), - &data_desc.data, - softmax_axis), - "could not create a softmax forward descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a softmax forward primitive descriptor"); - reset(result); - } - - engine get_engine() { return engine::query(*this); } - }; - - softmax_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a softmax forward primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_batch_norm Batch normalization -/// @{ - -struct batch_normalization_forward : public primitive { - struct desc { - mkldnn_batch_normalization_desc_t data; - template - desc(prop_kind aprop_kind, - const memory::desc &src_desc, - T epsilon, - unsigned flags) { - error::wrap_c_api( - mkldnn_batch_normalization_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - &src_desc.data, - static_cast(epsilon), - flags), - "could not create a batch normalization forward descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api(mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a batch normalization forward " - "primitive descriptor"); - reset(result); - } - - primitive_desc(const desc &adesc, - const primitive_attr &aattr, - const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create_v2( - &result, &adesc.data, aattr.get(), aengine.get(), nullptr), - "could not create a batch normalization forward " - "primitive descriptor"); - reset(result); - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t bndesc; - const_mkldnn_primitive_desc_t const_bndesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&bndesc, const_bndesc), - "could not clone a weights primitive descriptor"); - adesc.reset(bndesc); - return adesc; - } - - memory::primitive_desc mean_primitive_desc() const { - memory::primitive_desc aprimitive_desc; - mkldnn_primitive_desc_t bndesc; - mkldnn_batch_normalization_desc_t *p; - error::wrap_c_api( - mkldnn_primitive_desc_query( - get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p), - "could not get a batch-normalization descriptor"); - const_mkldnn_primitive_desc_t const_bndesc = - (p->flags & use_global_stats) - ? mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 1) - : mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&bndesc, const_bndesc), - "could not clone a mean primitive descriptor"); - aprimitive_desc.reset(bndesc); - return aprimitive_desc; - } - - memory::primitive_desc variance_primitive_desc() const { - memory::primitive_desc aprimitive_desc; - mkldnn_primitive_desc_t bndesc; - mkldnn_batch_normalization_desc_t *p; - error::wrap_c_api( - mkldnn_primitive_desc_query( - get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p), - "could not get a batch-normalization descriptor"); - const_mkldnn_primitive_desc_t const_bndesc = - (p->flags & use_global_stats) - ? mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 2) - : mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 2); - error::wrap_c_api(mkldnn_primitive_desc_clone(&bndesc, const_bndesc), - "could not clone a variance primitive descriptor"); - aprimitive_desc.reset(bndesc); - return aprimitive_desc; - } - - memory::primitive_desc workspace_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(workspace_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a workspace primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &mean, - const primitive::at &variance, - const primitive::at &weights, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = { - src.data, mean.data, variance.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } - - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &mean, - const primitive::at &variance, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, mean.data, variance.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } - - /// @warning batch_normalization_forward has 2 constructors with very - /// similar signatures: - /// - (pd, src, weights, dst, mean, variance) // 2 in, 3 out - /// - (pd, src, dst, mean, variance, workspace) // 1 in, 4 out - /// The only way to distinguish between those is to explicitly - /// cast all input parameters to their type, i.e. to - /// const primitive:at &. - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const memory &dst, - const memory &mean, - const memory &variance) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data}; - const_mkldnn_primitive_t outputs[] = { - dst.get(), mean.get(), variance.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } - - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const memory &dst, - const memory &mean, - const memory &variance, - const memory &workspace) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data}; - const_mkldnn_primitive_t outputs[] = { - dst.get(), mean.get(), variance.get(), workspace.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } - - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst, - const memory &mean, - const memory &variance) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = { - dst.get(), mean.get(), variance.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } - - /// @warning batch_normalization_forward has 2 constructors with very - /// similar signatures: - /// - (pd, src, weights, dst, mean, variance) // 2 in, 3 out - /// - (pd, src, dst, mean, variance, workspace) // 1 in, 4 out - /// The only way to distinguish between those is to explicitly - /// cast all input parameters to their type, i.e. to - /// const primitive:at &. - /// @note to make users' experience a little bit better this constructor - /// checks if whether parameters match corresponding primitive - /// descriptor, and if they are not -- call the other (proper) - /// constructor. Yeah, this is still very ugly... - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst, - const memory &mean, - const memory &variance, - const memory &workspace) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[2] = {src.data}; - const_mkldnn_primitive_t outputs[4] = { - dst.get(), mean.get(), variance.get(), workspace.get()}; - - if (1) { // check whether this is the `wrong` constructor - const int n_inputs_expected = mkldnn_primitive_desc_query_s32( - aprimitive_desc.get(), mkldnn_query_num_of_inputs_s32, 0); - const int n_outputs_expected = mkldnn_primitive_desc_query_s32( - aprimitive_desc.get(), mkldnn_query_num_of_outputs_s32, 0); - if (n_inputs_expected == 2 && n_outputs_expected == 3) { - // shift parameters, get rid of workspace, and add weights... - auto _weights = dst; - inputs[1] = {_weights.get(), 0}; - - auto _dst = mean, _mean = variance, _variance = workspace; - outputs[0] = _dst.get(); - outputs[1] = _mean.get(); - outputs[2] = _variance.get(); - outputs[3] = nullptr; - } - } - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } - - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &weights, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } - - batch_normalization_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization forward primitive"); - reset(result); - } -}; - -struct batch_normalization_backward : public primitive { - struct desc { - mkldnn_batch_normalization_desc_t data; - template - desc(prop_kind aprop_kind, - const memory::desc &diff_data_desc, - const memory::desc &data_desc, - T epsilon, - unsigned flags) { - error::wrap_c_api( - mkldnn_batch_normalization_backward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - &diff_data_desc.data, - &data_desc.data, - static_cast(epsilon), - flags), - "could not create a batch normalization backward descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, - const engine &aengine, - const batch_normalization_forward::primitive_desc - &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a batch normalization backward primitive " - "descriptor"); - reset(result); - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t bndesc; - const_mkldnn_primitive_desc_t const_bndesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&bndesc, const_bndesc), - "could not clone a weights primitive descriptor"); - adesc.reset(bndesc); - return adesc; - } - - memory::primitive_desc diff_weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t bndesc; - const_mkldnn_primitive_desc_t const_bndesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&bndesc, const_bndesc), - "could not clone a diff_weights primitive descriptor"); - adesc.reset(bndesc); - return adesc; - } - - memory::primitive_desc mean_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t bndesc; - const_mkldnn_primitive_desc_t const_bndesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&bndesc, const_bndesc), - "could not clone a mean primitive descriptor"); - adesc.reset(bndesc); - return adesc; - } - - memory::primitive_desc variance_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t bndesc; - const_mkldnn_primitive_desc_t const_bndesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 2); - error::wrap_c_api(mkldnn_primitive_desc_clone(&bndesc, const_bndesc), - "could not clone a variance primitive descriptor"); - adesc.reset(bndesc); - return adesc; - } - - memory::primitive_desc workspace_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(workspace_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a workspace primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - // Prop_kind == backward - batch_normalization_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &mean, - const primitive::at &variance, - const primitive::at &diff_dst, - const primitive::at &weights, - const memory &diff_src, - const memory &diff_weights) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = { - src.data, mean.data, variance.data, diff_dst.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get(), diff_weights.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization backward primitive"); - reset(result); - } - - // Prop_kind == backward (+ws) - batch_normalization_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &mean, - const primitive::at &variance, - const primitive::at &diff_dst, - const primitive::at &weights, - const primitive::at &workspace, - const memory &diff_src, - const memory &diff_weights) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, - mean.data, - variance.data, - diff_dst.data, - weights.data, - workspace.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get(), diff_weights.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization backward primitive"); - reset(result); - } - - // Prop_kind == backward_data (+ws or +weights) - /// @warning This constructor works for backward_data propagation - /// - w/ weights but w/o workspace, or - /// - w/ workspace but w/o weights - batch_normalization_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &mean, - const primitive::at &variance, - const primitive::at &diff_dst, - const primitive::at &weights_or_workspace, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, - mean.data, - variance.data, - diff_dst.data, - weights_or_workspace.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization backward primitive"); - reset(result); - } - - // Prop_kind == backward_data - batch_normalization_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at &mean, - const primitive::at &variance, - const primitive::at &diff_dst, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = { - src.data, mean.data, variance.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a batch normalization backward primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_inner_product Inner Product -/// @{ - -struct inner_product_forward : public primitive { - struct desc { - mkldnn_inner_product_desc_t data; - desc(prop_kind aprop_kind, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &bias_desc, - const memory::desc &dst_desc) { - error::wrap_c_api(mkldnn_inner_product_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - &src_desc.data, - &weights_desc.data, - &bias_desc.data, - &dst_desc.data), - "could not create a inner product forward descriptor"); - } - - desc(prop_kind aprop_kind, - const memory::desc &src_desc, - const memory::desc &weights_desc, - const memory::desc &dst_desc) { - error::wrap_c_api(mkldnn_inner_product_forward_desc_init( - &data, - mkldnn::convert_to_c(aprop_kind), - &src_desc.data, - &weights_desc.data, - nullptr, - &dst_desc.data), - "could not create a inner product forward descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create a inner product forward primitive descriptor"); - reset(result); - } - - primitive_desc(const desc &adesc, - const primitive_attr &aattr, - const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create_v2( - &result, &adesc.data, aattr.get(), aengine.get(), nullptr), - "could not create a inner product " - "forward primitive descriptor"); - reset(result); - } - - memory::primitive_desc src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - inner_product_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at weights, - const primitive::at &bias, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data, bias.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a inner product forward primitive"); - reset(result); - } - - inner_product_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at weights, - const memory &dst) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {dst.get()}; - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a inner product forward primitive"); - reset(result); - } -}; - -struct inner_product_backward_data : public primitive { - struct desc { - mkldnn_inner_product_desc_t data; - desc(const memory::desc &diff_src_desc, - const memory::desc &weights_desc, - const memory::desc &diff_dst_desc) { - error::wrap_c_api( - mkldnn_inner_product_backward_data_desc_init(&data, - &diff_src_desc.data, - &weights_desc.data, - &diff_dst_desc.data), - "could not create a inner product backward data descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const inner_product_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a inner product backward data primitive " - "descriptor"); - reset(result); - } - - memory::primitive_desc diff_dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff dst primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff src primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - inner_product_backward_data(const primitive_desc &aprimitive_desc, - const primitive::at &diff_dst, - const primitive::at weights, - const memory &diff_src) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {diff_dst.data, weights.data}; - const_mkldnn_primitive_t outputs[] = {diff_src.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a inner product backward data primitive"); - reset(result); - } -}; - -struct inner_product_backward_weights : public primitive { - struct desc { - mkldnn_inner_product_desc_t data; - desc(const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_bias_desc, - const memory::desc &diff_dst_desc) { - error::wrap_c_api( - mkldnn_inner_product_backward_weights_desc_init( - &data, - &src_desc.data, - &diff_weights_desc.data, - &diff_bias_desc.data, - &diff_dst_desc.data), - "could not create a inner product backward weights descriptor"); - } - desc(const memory::desc &src_desc, - const memory::desc &diff_weights_desc, - const memory::desc &diff_dst_desc) { - error::wrap_c_api( - mkldnn_inner_product_backward_weights_desc_init( - &data, - &src_desc.data, - &diff_weights_desc.data, - nullptr, - &diff_dst_desc.data), - "could not create a inner product backward weights descriptor"); - } - }; - - struct primitive_desc : public handle { - primitive_desc( - const desc &adesc, - const engine &aengine, - const inner_product_forward::primitive_desc &hint_fwd_primitive_desc) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create(&result, - &adesc.data, - aengine.get(), - hint_fwd_primitive_desc.get()), - "could not create a inner product backward weights primitive " - "descriptor"); - reset(result); - } - - memory::primitive_desc diff_dst_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff dst primititve descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_weights_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a diff bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc src_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - inner_product_backward_weights(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at diff_dst, - const memory &diff_weights) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_weights.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a inner product backward weights primitive"); - reset(result); - } - - inner_product_backward_weights(const primitive_desc &aprimitive_desc, - const primitive::at &src, - const primitive::at diff_dst, - const memory &diff_weights, - const memory &diff_bias) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[] = {src.data, diff_dst.data}; - const_mkldnn_primitive_t outputs[] = {diff_weights.get(), diff_bias.get()}; - error::wrap_c_api( - mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create a inner product backward weights primitive"); - reset(result); - } -}; - -/// @} - -/// @addtogroup cpp_api_rnn RNN -/// @{ - -struct rnn_cell { - struct desc { - mkldnn_rnn_cell_desc_t c_rnn_cell_; - - desc(algorithm kind, algorithm activation_f) { - error::wrap_c_api( - mkldnn_rnn_cell_desc_init(&c_rnn_cell_, - mkldnn::convert_to_c(kind), - mkldnn::convert_to_c(activation_f), - 0U, - 0, - 0), - "could not init an rnn cell descriptor"); - } - desc(algorithm kind) : desc(kind, algorithm::algorithm_undef) {} - - operator const mkldnn_rnn_cell_desc_t *() const { return &c_rnn_cell_; } - - algorithm get_cell_kind() const { return algorithm(c_rnn_cell_.cell_kind); } - algorithm get_activation() const { - return algorithm(c_rnn_cell_.activation_kind); - } - - float get_alpha() const { return c_rnn_cell_.alpha; } - void set_alpha(float alpha) { - c_rnn_cell_.flags |= mkldnn_rnn_cell_with_relu; - c_rnn_cell_.alpha = alpha; - } - - float get_clipping() const { return c_rnn_cell_.clipping; } - void set_clipping(float clipping) { - c_rnn_cell_.flags |= mkldnn_rnn_cell_with_clipping; - c_rnn_cell_.clipping = clipping; - } - - int get_gates_count() const { - return mkldnn_rnn_cell_get_gates_count(&c_rnn_cell_); - } - int get_state_count() const { - return mkldnn_rnn_cell_get_states_count(&c_rnn_cell_); - } - }; -}; - -struct rnn_forward : public primitive { - struct desc { - mkldnn_rnn_desc_t data; - desc(prop_kind aprop_kind, - rnn_cell::desc cell, - const rnn_direction direction, - const memory::desc &src_layer_desc, - const memory::desc &src_iter_desc, - const memory::desc &weights_layer_desc, - const memory::desc &weights_iter_desc, - const memory::desc &bias_desc, - const memory::desc &dst_layer_desc, - const memory::desc &dst_iter_desc) { - error::wrap_c_api( - mkldnn_rnn_forward_desc_init(&data, - mkldnn::convert_to_c(aprop_kind), - cell, - mkldnn::convert_to_c(direction), - &src_layer_desc.data, - &src_iter_desc.data, - &weights_layer_desc.data, - &weights_iter_desc.data, - &bias_desc.data, - &dst_layer_desc.data, - &dst_iter_desc.data), - "could not create an RNN forward descriptor"); - } - }; - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api(mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create an RNN forward primitive descriptor"); - reset(result); - } - - memory::primitive_desc src_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone an src layer primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc src_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src iter primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_src_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 2); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc workspace_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t ldesc; - const_mkldnn_primitive_desc_t const_ldesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(workspace_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&ldesc, const_ldesc), - "could not clone a workspace primitive descriptor"); - adesc.reset(ldesc); - return adesc; - } - - memory::primitive_desc dst_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api( - mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst last layer primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 1); - error::wrap_c_api( - mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst last iteration primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - - rnn_forward(const primitive_desc &aprimitive_desc, - const primitive::at &src_layer, - const primitive::at &src_iter, - const primitive::at &weights_layer, - const primitive::at &weights_iter, - const primitive::at &bias, - const memory &dst_layer, - const memory &dst_iter, - const memory &workspace) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[5]; - const_mkldnn_primitive_t outputs[3]; - int idx = 0; - inputs[idx++] = src_layer.data; - if (!is_null_memory(src_iter.data.primitive)) inputs[idx++] = src_iter.data; - inputs[idx++] = weights_layer.data; - inputs[idx++] = weights_iter.data; - if (!is_null_memory(bias.data.primitive)) inputs[idx++] = bias.data; - - idx = 0; - outputs[idx++] = dst_layer.get(); - if (!is_null_memory(dst_iter.get())) outputs[idx++] = dst_iter.get(); - if (!is_null_memory(workspace.get())) outputs[idx++] = workspace.get(); - - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create an RNN forward primitive"); - reset(result); - } -}; - -struct rnn_backward : public primitive { - struct desc { - mkldnn_rnn_desc_t data; - desc(prop_kind aprop_kind, - rnn_cell::desc cell, - const rnn_direction direction, - const memory::desc &src_layer_desc, - const memory::desc &src_iter_desc, - const memory::desc &weights_layer_desc, - const memory::desc &weights_iter_desc, - const memory::desc &bias_desc, - const memory::desc &dst_layer_desc, - const memory::desc &dst_iter_desc, - const memory::desc &diff_src_layer_desc, - const memory::desc &diff_src_iter_desc, - const memory::desc &diff_weights_layer_desc, - const memory::desc &diff_weights_iter_desc, - const memory::desc &diff_bias_desc, - const memory::desc &diff_dst_layer_desc, - const memory::desc &diff_dst_iter_desc) { - error::wrap_c_api( - mkldnn_rnn_backward_desc_init(&data, - mkldnn::convert_to_c(aprop_kind), - cell, - mkldnn::convert_to_c(direction), - &src_layer_desc.data, - &src_iter_desc.data, - &weights_layer_desc.data, - &weights_iter_desc.data, - &bias_desc.data, - &dst_layer_desc.data, - &dst_iter_desc.data, - &diff_src_layer_desc.data, - &diff_src_iter_desc.data, - &diff_weights_layer_desc.data, - &diff_weights_iter_desc.data, - &diff_bias_desc.data, - &diff_dst_layer_desc.data, - &diff_dst_iter_desc.data), - "could not create an RNN backward descriptor"); - } - }; - struct primitive_desc : public handle { - primitive_desc(const desc &adesc, const engine &aengine) { - mkldnn_primitive_desc_t result; - error::wrap_c_api( - mkldnn_primitive_desc_create( - &result, &adesc.data, aengine.get(), nullptr), - "could not create an RNN backward primitive descriptor"); - reset(result); - } - - memory::primitive_desc src_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone an src layer primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc src_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(src_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src iter primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc weights_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(weights_pd), 2); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 0); - error::wrap_c_api( - mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst last layer primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc dst_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(dst_pd), 1); - error::wrap_c_api( - mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst last iteration primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_src_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone an src_layer primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_src_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_src_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a src iter primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_weights_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_weights_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 1); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a weights primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_bias_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_weights_pd), 2); - error::wrap_c_api(mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a bias primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_dst_layer_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 0); - error::wrap_c_api( - mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst last layer primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc diff_dst_iter_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; - const_mkldnn_primitive_desc_t const_cdesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(diff_dst_pd), 1); - error::wrap_c_api( - mkldnn_primitive_desc_clone(&cdesc, const_cdesc), - "could not clone a dst last iteration primitive descriptor"); - adesc.reset(cdesc); - return adesc; - } - - memory::primitive_desc workspace_primitive_desc() const { - memory::primitive_desc adesc; - mkldnn_primitive_desc_t ldesc; - const_mkldnn_primitive_desc_t const_ldesc = - mkldnn_primitive_desc_query_pd( - get(), mkldnn::convert_to_c(workspace_pd), 0); - error::wrap_c_api(mkldnn_primitive_desc_clone(&ldesc, const_ldesc), - "could not clone a workspace primitive descriptor"); - adesc.reset(ldesc); - return adesc; - } - - engine get_engine() { return engine::query(*this); } - }; - // With last iteration (with and without input src_iter) - rnn_backward(const primitive_desc &aprimitive_desc, - const primitive::at &src_layer, - const primitive::at &src_iter, - const primitive::at &weights_layer, - const primitive::at &weights_iter, - const primitive::at &bias, - const primitive::at &dst_layer, - const primitive::at &dst_iter, - const memory &diff_src_layer, - const memory &diff_src_iter, - const memory &diff_weights_layer, - const memory &diff_weights_iter, - const memory &diff_bias, - const primitive::at &diff_dst_layer, - const primitive::at &diff_dst_iter, - const primitive::at &workspace) { - mkldnn_primitive_t result; - mkldnn_primitive_at_t inputs[10]; - const_mkldnn_primitive_t outputs[5]; - int idx = 0; - inputs[idx] = src_layer.data; - if (!is_null_memory(src_iter.data.primitive)) inputs[idx++] = src_iter.data; - inputs[idx++] = weights_layer.data; - inputs[idx++] = weights_iter.data; - if (!is_null_memory(bias.data.primitive)) inputs[idx++] = bias.data; - inputs[idx] = dst_layer.data; - if (!is_null_memory(dst_iter.data.primitive)) inputs[idx++] = dst_iter.data; - inputs[idx] = diff_dst_layer.data; - if (!is_null_memory(diff_dst_iter.data.primitive)) - inputs[idx++] = diff_dst_iter.data; - inputs[idx] = workspace.data; - - idx = 0; - outputs[idx] = diff_src_layer.get(); - if (!is_null_memory(diff_src_iter.get())) - outputs[idx++] = diff_src_iter.get(); - outputs[idx] = diff_weights_layer.get(); - outputs[idx] = diff_weights_iter.get(); - if (!is_null_memory(diff_bias.get())) outputs[idx] = diff_bias.get(); - error::wrap_c_api(mkldnn_primitive_create( - &result, aprimitive_desc.get(), inputs, outputs), - "could not create an RNN backward primitive"); - reset(result); - } -}; - -/// @} -/// @} Primitives - -/// @addtogroup cpp_api_stream Stream -/// @{ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -template <> -struct handle_traits { - static constexpr auto destructor = &mkldnn_stream_destroy; -}; -#endif - -struct stream : public handle { - using handle::handle; - - enum kind { - any = mkldnn_stream_kind_t::mkldnn_any_stream, - eager = mkldnn_stream_kind_t::mkldnn_eager, - lazy = mkldnn_stream_kind_t::mkldnn_lazy - }; - - static mkldnn_stream_kind_t convert_to_c(kind akind) { - return static_cast(akind); - } - /// Constructs a stream. - stream(kind akind) { - mkldnn_stream_t astream; - error::wrap_c_api(mkldnn_stream_create(&astream, convert_to_c(akind)), - "could not create a stream"); - reset(astream); - } - - /// Submits a vector of primitives to a stream for computations. - /// - /// @param primitives The vector of primitives to submit. - /// @returns The stream. - stream &submit(std::vector primitives) { - // TODO: find a proper way to convert vector to - // vector - if (primitives.size() == 0) return *this; - std::vector c_api_primitives; - c_api_primitives.reserve(primitives.size()); - auto convert_to_c = [](primitive p) { return p.get(); }; - std::transform(primitives.begin(), - primitives.end(), - std::back_inserter(c_api_primitives), - convert_to_c); - - mkldnn_primitive_t c_api_error_primitive; - error::wrap_c_api(mkldnn_stream_submit(get(), - c_api_primitives.size(), - &c_api_primitives[0], - &c_api_error_primitive), - "could not submit primitives to a stream", - &c_api_error_primitive); - - return *this; - } - - /// Waits for all computations submitted to the stream to complete. - /// - /// @param block Specifies whether the operation should wait indefinitely or - /// return - /// immediately. - /// @returns @c true if all computations completed. - /// @returns @c false if not all computations completed. - bool wait(bool block = true) { - mkldnn_primitive_t c_api_error_primitive; - mkldnn_status_t status = - mkldnn_stream_wait(get(), block, &c_api_error_primitive); - if (status != mkldnn_success && status != mkldnn_try_again) - error::wrap_c_api( - status, "could not wait on a stream", &c_api_error_primitive); - return (status == mkldnn_success); - } - - stream &rerun() { - mkldnn_primitive_t c_api_error_primitive; - error::wrap_c_api(mkldnn_stream_rerun(get(), &c_api_error_primitive), - "could not rerun a stream", - &c_api_error_primitive); - return *this; - } -}; - -/// @} - -/// @} C++ API - -} // namespace mkldnn - -#endif diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 38c765938fe9d7b2103bfdd926874c485d0ff4dc..161ea55586bbb6bde2cbb0084bb67b184f91460e 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -72,6 +72,8 @@ def convert_np_dtype_to_dtype_(np_dtype): return core.VarDesc.VarType.INT64 elif dtype == np.bool: return core.VarDesc.VarType.BOOL + elif dtype == np.uint8: + return core.VarDesc.VarType.UINT8 else: raise ValueError("Not supported numpy dtype " + str(dtype)) diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py index 56c008d1af70f4b5f6169ebe5174b08fcf8bc722..894f6dbfadcaf532556c439daf2c3b4ca24ffeb4 100644 --- a/python/paddle/fluid/inferencer.py +++ b/python/paddle/fluid/inferencer.py @@ -12,11 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import contextlib + import core import executor import framework import io +import parallel_executor import unique_name from trainer import check_and_get_place @@ -24,40 +27,53 @@ __all__ = ['Inferencer', ] class Inferencer(object): - def __init__(self, infer_func, param_path, place=None): + def __init__(self, infer_func, param_path, place=None, parallel=False): """ :param infer_func: a function that will return predict Variable :param param_path: the path where the inference model is saved by fluid.io.save_params :param place: place to do the inference + :param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU. """ self.param_path = param_path self.scope = core.Scope() + self.parallel = parallel + self.place = check_and_get_place(place) self.inference_program = framework.Program() with framework.program_guard(self.inference_program): with unique_name.guard(): self.predict_var = infer_func() - self.exe = executor.Executor(check_and_get_place(place)) - with executor.scope_guard(self.scope): + with self._prog_and_scope_guard(): # load params from param_path into scope - io.load_params(self.exe, param_path, self.inference_program) + io.load_params(executor.Executor(self.place), param_path) + + if parallel: + with self._prog_and_scope_guard(): + self.exe = parallel_executor.ParallelExecutor( + use_cuda=isinstance(self.place, core.CUDAPlace), + loss_name=self.predict_var.name) + else: + self.exe = executor.Executor(self.place) - def infer(self, inputs, return_numpy=True): + def infer(self, inputs): """ :param inputs: a map of {"input_name": input_var} that will be feed into the inference program to get the predict value - :param return_numpy: if return numpy value for row tensor :return: the predict value of the inference model """ if not isinstance(inputs, dict): raise ValueError( "inputs should be a map of {'input_name': input_var}") - with executor.scope_guard(self.scope): - results = self.exe.run(self.inference_program, - feed=inputs, - fetch_list=[self.predict_var], - return_numpy=return_numpy) + with self._prog_and_scope_guard(): + results = self.exe.run(feed=inputs, + fetch_list=[self.predict_var.name]) return results + + @contextlib.contextmanager + def _prog_and_scope_guard(self): + with framework.program_guard(main_program=self.inference_program): + with executor.scope_guard(self.scope): + yield diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 4b707973e27391a6bdcba138934f62a255e04bb2..dee41448081cbfcd8224ce2abbf3ba7b7b97eb7c 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -49,6 +49,7 @@ __all__ = [ 'reorder_lod_tensor_by_rank', 'ParallelDo', 'Print', + 'is_empty', ] @@ -1562,3 +1563,40 @@ def reorder_lod_tensor_by_rank(x, rank_table): 'RankTable': [rank_table]}, outputs={'Out': [out]}) return out + + +def is_empty(x, cond=None, **ignored): + """ + **Is Empty** + + This layer returns the truth value of whether the variable is empty. + + Args: + x(Variable): Operand of *is_empty* + cond(Variable|None): Optional output variable to store the result + of *is_empty* + + Returns: + Variable: The tensor variable storing the output of *is_empty*. + + Raises: + TypeError: If input cond is not a variable, or cond's dtype is + not bool + + Examples: + .. code-block:: python + + less = fluid.layers.is_empty(x=input) + """ + helper = LayerHelper("is_empty", **locals()) + if cond is None: + cond = helper.create_tmp_variable(dtype='bool') + cond.stop_gradient = True + elif not isinstance(cond, Variable): + raise TypeError("cond takes a variable") + elif cond.dtype != 'bool': + raise TypeError("The data type of cond must be bool") + + helper.append_op( + type='is_empty', inputs={'X': [x]}, outputs={'Out': [cond]}) + return cond diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index a5938fe494265778ef7032c56a8d6d35acd729c5..b33adf55cf1ded9795043e108f5814d3fc0e3ded 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -23,6 +23,7 @@ import nn import math __all__ = [ + 'prior_box', 'multi_box_head', 'bipartite_match', 'target_assign', @@ -564,6 +565,98 @@ def ssd_loss(location, return loss +def prior_box(input, + image, + min_sizes, + max_sizes=None, + aspect_ratios=None, + variance=[0.1, 0.1, 0.2, 0.2], + flip=False, + clip=False, + steps=[0.0, 0.0], + offset=0.5, + name=None): + """ + **Prior box operator** + + Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. + Each position of the input produce N prior boxes, N is determined by + the count of min_sizes, max_sizes and aspect_ratios, The size of the + box is in range(min_size, max_size) interval, which is generated in + sequence according to the aspect_ratios. + + Args: + input(Variable): The Input Variables, the format is NCHW. + image(Variable): The input image data of PriorBoxOp, + the layout is NCHW. + min_sizes(list|tuple): min sizes of generated prior boxes. + max_sizes(list|tuple|None): max sizes of generated prior boxes. + Default: None. + aspect_ratios(list|tuple): the aspect ratios of generated prior + boxes. Default: None. + variance(list|tuple): the variances to be encoded in prior boxes. + Default:[0.1, 0.1, 0.2, 0.2]. + flip(bool): Whether to flip aspect ratios. Default:False. + clip(bool): Whether to clip out-of-boundary boxes. Default: False. + step(list|turple): Prior boxes step across weight and height, If + step[0] == 0.0/step[1] == 0.0, the prior boxes step across + height/weight of the input will be automatically calculated. + Default: [0.0] + offset(float): Prior boxes center offset. Default: 0.5 + name(str): Name of the prior box op. Default: None. + + Returns: + boxes(Variable): the output prior boxes of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input, + num_priors is the total + box count of each position of input. + Variances(Variable): the expanded variances of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input + num_priors is the total + box count of each position of input + + + Examples: + .. code-block:: python + box, var = prior_box( + input=conv1, + image=images, + min_sizes=[100.], + flip=True, + clip=True) + """ + helper = LayerHelper("prior_box", **locals()) + dtype = helper.input_dtype() + + attrs = { + 'min_sizes': min_sizes, + 'aspect_ratios': aspect_ratios, + 'variances': variance, + 'flip': flip, + 'clip': clip, + 'step_w': steps[0], + 'step_h': steps[1], + 'offset': offset + } + if max_sizes is not None and len(max_sizes) > 0 and max_sizes[0] > 0: + attrs['max_sizes'] = max_sizes + + box = helper.create_tmp_variable(dtype) + var = helper.create_tmp_variable(dtype) + helper.append_op( + type="prior_box", + inputs={"Input": input, + "Image": image}, + outputs={"Boxes": box, + "Variances": var}, + attrs=attrs, ) + box.stop_gradient = True + var.stop_gradient = True + return box, var + + def multi_box_head(inputs, image, base_size, @@ -660,47 +753,6 @@ def multi_box_head(inputs, clip=True) """ - def _prior_box_(input, - image, - min_sizes, - max_sizes, - aspect_ratios, - variance, - flip=False, - clip=False, - step_w=0.0, - step_h=0.0, - offset=0.5, - name=None): - helper = LayerHelper("prior_box", **locals()) - dtype = helper.input_dtype() - - attrs = { - 'min_sizes': min_sizes, - 'aspect_ratios': aspect_ratios, - 'variances': variance, - 'flip': flip, - 'clip': clip, - 'step_w': step_w, - 'step_h': step_h, - 'offset': offset - } - if len(max_sizes) > 0 and max_sizes[0] > 0: - attrs['max_sizes'] = max_sizes - - box = helper.create_tmp_variable(dtype) - var = helper.create_tmp_variable(dtype) - helper.append_op( - type="prior_box", - inputs={"Input": input, - "Image": image}, - outputs={"Boxes": box, - "Variances": var}, - attrs=attrs, ) - box.stop_gradient = True - var.stop_gradient = True - return box, var - def _reshape_with_axis_(input, axis=1): if not (axis > 0 and axis < len(input.shape)): raise ValueError("The axis should be smaller than " @@ -777,11 +829,10 @@ def multi_box_head(inputs, aspect_ratio = aspect_ratios[i] if not _is_list_or_tuple_(aspect_ratio): aspect_ratio = [aspect_ratio] + step = [step_w[i] if step_w else 0.0, step_h[i] if step_w else 0.0] - box, var = _prior_box_(input, image, min_size, max_size, aspect_ratio, - variance, flip, clip, step_w[i] - if step_w else 0.0, step_h[i] - if step_w else 0.0, offset) + box, var = prior_box(input, image, min_size, max_size, aspect_ratio, + variance, flip, clip, step, offset) box_results.append(box) var_results.append(var) diff --git a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt index c2a15bdb3b17b65fe861dd429f548074c13e2f09..da76747f82d1ab51af07c2e942d1ea893e149b7e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt @@ -8,3 +8,4 @@ endforeach() add_subdirectory(fit_a_line) add_subdirectory(recognize_digits) +add_subdirectory(image_classification) diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index fbcf2a282f6421a546723a1d429c59fb304a0cc2..4c8505acf322a8ee33799c009b523cd70bd01db3 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -57,22 +57,20 @@ def train(use_cuda, train_program, save_dirname): optimizer=fluid.optimizer.SGD(learning_rate=0.001)) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): - test_metrics = trainer.test( - reader=test_reader, feed_order=['x', 'y']) - print test_metrics - ''' - - ... - ['25.768919467926025'] - ['15.343549569447836'] - ... - - ''' - if float(test_metrics[0]) < 20.0: + if isinstance(event, fluid.EndStepEvent): + if event.step == 10: + test_metrics = trainer.test( + reader=test_reader, feed_order=['x', 'y']) + print test_metrics + ''' + ... + ['25.768919467926025'] + ['15.343549569447836'] + ... + ''' if save_dirname is not None: trainer.save_params(save_dirname) - return + trainer.stop() trainer.train( reader=train_reader, @@ -94,7 +92,7 @@ def infer(use_cuda, inference_program, save_dirname=None): tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") results = inferencer.infer({'x': tensor_x}) - print("infer results: ", results[0]) + print("infer results: ", numpy.array(results[0])) def main(use_cuda): diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/image_classification/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..673c965b662a022739f8d489c331f4de9455a926 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +# default test +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/cifar10_small_test_set.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/cifar10_small_test_set.py new file mode 100644 index 0000000000000000000000000000000000000000..7fed6d914f75b690e34411aa154359c93b6ca989 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/cifar10_small_test_set.py @@ -0,0 +1,82 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +CIFAR dataset. + +This module will download dataset from +https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into +paddle reader creators. + +The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, +with 6000 images per class. There are 50000 training images and 10000 test +images. + +The CIFAR-100 dataset is just like the CIFAR-10, except it has 100 classes +containing 600 images each. There are 500 training images and 100 testing +images per class. + +""" + +import cPickle +import itertools +import numpy +import paddle.v2.dataset.common +import tarfile + +__all__ = ['train10'] + +URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' +CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' +CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' + + +def reader_creator(filename, sub_name, batch_size=None): + def read_batch(batch): + data = batch['data'] + labels = batch.get('labels', batch.get('fine_labels', None)) + assert labels is not None + for sample, label in itertools.izip(data, labels): + yield (sample / 255.0).astype(numpy.float32), int(label) + + def reader(): + with tarfile.open(filename, mode='r') as f: + names = (each_item.name for each_item in f + if sub_name in each_item.name) + + batch_count = 0 + for name in names: + batch = cPickle.load(f.extractfile(name)) + for item in read_batch(batch): + if isinstance(batch_size, int) and batch_count > batch_size: + break + batch_count += 1 + yield item + + return reader + + +def train10(batch_size=None): + """ + CIFAR-10 training set creator. + + It returns a reader creator, each sample in the reader is image pixels in + [0, 1] and label in [0, 9]. + + :return: Training reader creator + :rtype: callable + """ + return reader_creator( + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'data_batch', + batch_size=batch_size) diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py similarity index 77% rename from python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_resnet.py rename to python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index 17db38797cf19ae387f69f66daa42fc78cfcb7d5..1160e500dbd6db784eeb81b72968386347fec59a 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -17,6 +17,7 @@ from __future__ import print_function import paddle import paddle.fluid as fluid import numpy +import cifar10_small_test_set def resnet_cifar10(input, depth=32): @@ -81,46 +82,50 @@ def train_network(): cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) accuracy = fluid.layers.accuracy(input=predict, label=label) - return avg_cost, accuracy + return [avg_cost, accuracy] -def train(use_cuda, save_path): +def train(use_cuda, train_program, save_dirname): BATCH_SIZE = 128 EPOCH_NUM = 1 train_reader = paddle.batch( paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=128 * 10), + cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) def event_handler(event): - if isinstance(event, fluid.EndIteration): - if (event.batch_id % 10) == 0: - avg_cost, accuracy = trainer.test(reader=test_reader) + if isinstance(event, fluid.EndStepEvent): + avg_cost, accuracy = trainer.test( + reader=test_reader, feed_order=['pixel', 'label']) - print('BatchID {1:04}, Loss {2:2.2}, Acc {3:2.2}'.format( - event.batch_id + 1, avg_cost, accuracy)) + print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) - if accuracy > 0.01: # Low threshold for speeding up CI - trainer.params.save(save_path) - return + if accuracy > 0.01: # Low threshold for speeding up CI + if save_dirname is not None: + trainer.save_params(save_dirname) + return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = fluid.Trainer( - train_network, + train_func=train_program, optimizer=fluid.optimizer.Adam(learning_rate=0.001), - place=place, - event_handler=event_handler) - trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler) + place=place) + trainer.train( + reader=train_reader, + num_epochs=EPOCH_NUM, + event_handler=event_handler, + feed_order=['pixel', 'label']) -def infer(use_cuda, save_path): - params = fluid.Params(save_path) + +def infer(use_cuda, inference_program, save_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer(inference_network, params, place=place) + inferencer = fluid.Inferencer( + infer_func=inference_program, param_path=save_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -135,8 +140,14 @@ def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return save_path = "image_classification_resnet.inference.model" - train(use_cuda, save_path) - infer(use_cuda, save_path) + + train( + use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + + infer( + use_cuda=use_cuda, + inference_program=inference_network, + save_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py similarity index 72% rename from python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_vgg.py rename to python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index e83afeed2f72635a40aa2ac21dc0c8611c309de4..1e3e955ba0299f2cc0fcc02d79ae6fd8ff4c1171 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -17,6 +17,7 @@ from __future__ import print_function import paddle import paddle.fluid as fluid import numpy +import cifar10_small_test_set def vgg16_bn_drop(input): @@ -60,46 +61,48 @@ def train_network(): cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) accuracy = fluid.layers.accuracy(input=predict, label=label) - return avg_cost, accuracy + return [avg_cost, accuracy] -def train(use_cuda, save_path): +def train(use_cuda, train_program, save_dirname): BATCH_SIZE = 128 - EPOCH_NUM = 1 - train_reader = paddle.batch( paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=128 * 10), + cifar10_small_test_set.train10(batch_size=10), buf_size=128 * 10), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) def event_handler(event): - if isinstance(event, fluid.EndIteration): - if (event.batch_id % 10) == 0: - avg_cost, accuracy = trainer.test(reader=test_reader) + if isinstance(event, fluid.EndStepEvent): + avg_cost, accuracy = trainer.test( + reader=test_reader, feed_order=['pixel', 'label']) - print('BatchID {1:04}, Loss {2:2.2}, Acc {3:2.2}'.format( - event.batch_id + 1, avg_cost, accuracy)) + print('Loss {0:2.2}, Acc {1:2.2}'.format(avg_cost, accuracy)) - if accuracy > 0.01: # Low threshold for speeding up CI - trainer.params.save(save_path) - return + if accuracy > 0.01: # Low threshold for speeding up CI + if save_dirname is not None: + trainer.save_params(save_dirname) + return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = fluid.Trainer( - train_network, - optimizer=fluid.optimizer.Adam(learning_rate=0.001), + train_func=train_program, place=place, - event_handler=event_handler) - trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler) + optimizer=fluid.optimizer.Adam(learning_rate=0.001)) + + trainer.train( + reader=train_reader, + num_epochs=1, + event_handler=event_handler, + feed_order=['pixel', 'label']) -def infer(use_cuda, save_path): - params = fluid.Params(save_path) +def infer(use_cuda, inference_program, save_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer(inference_network, params, place=place) + inferencer = fluid.Inferencer( + infer_func=inference_program, param_path=save_dirname, place=place) # The input's dimension of conv should be 4-D or 5-D. # Use normilized image pixels as input data, which should be in the range @@ -114,8 +117,14 @@ def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return save_path = "image_classification_vgg.inference.model" - train(use_cuda, save_path) - infer(use_cuda, save_path) + + train( + use_cuda=use_cuda, train_program=train_network, save_dirname=save_path) + + infer( + use_cuda=use_cuda, + inference_program=inference_network, + save_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index 2aac70463c64019ec97b0c3893b4b52f77967797..2128d4c5b87434ebe30930dc0e338b3b50d921c2 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -112,7 +112,7 @@ def infer(use_cuda, inference_program, save_dirname=None): results = inferencer.infer({'img': tensor_img}) - print("infer results: ", results[0]) + print("infer results: ", numpy.array(results[0])) def main(use_cuda): diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index 32653157994f81c46f420c1b55ceddbbbf06f2fe..041c8d778e5c03aa68dad6ef450934f09c8d2a52 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -93,7 +93,7 @@ def infer(use_cuda, inference_program, save_dirname=None): results = inferencer.infer({'img': tensor_img}) - print("infer results: ", results[0]) + print("infer results: ", numpy.array(results[0])) def main(use_cuda): diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py similarity index 80% rename from python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py rename to python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index 4f861e5aaeca7ce0f73450c09f9ddc1ed7417469..bf86cd9acf8da940fcc2fb5b594e33f9b6965acb 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -90,7 +90,7 @@ def train_program(is_sparse): return avg_cost -def train(use_cuda, train_program, save_path): +def train(use_cuda, train_program, save_dirname): train_reader = paddle.batch( paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) test_reader = paddle.batch( @@ -99,27 +99,36 @@ def train(use_cuda, train_program, save_path): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): - outs = trainer.test(reader=test_reader) + if isinstance(event, fluid.EndStepEvent): + outs = trainer.test( + reader=test_reader, + feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw']) avg_cost = outs[0] print("loss= ", avg_cost) - if avg_cost < 5.0: - trainer.save_params(save_path) - return + if avg_cost < 10.0: + trainer.save_params(save_dirname) + trainer.stop() + if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") trainer = fluid.Trainer( - train_program, fluid.optimizer.SGD(learning_rate=0.001), place=place) + train_func=train_program, + optimizer=fluid.optimizer.SGD(learning_rate=0.001), + place=place) + trainer.train( - reader=train_reader, num_epochs=1, event_handler=event_handler) + reader=train_reader, + num_epochs=1, + event_handler=event_handler, + feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw']) -def infer(use_cuda, inference_program, save_path): +def infer(use_cuda, inference_program, save_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( - infer_func=inference_program, param_path=save_path, place=place) + infer_func=inference_program, param_path=save_dirname, place=place) lod = [0, 1] first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1) @@ -142,9 +151,17 @@ def main(use_cuda, is_sparse): if use_cuda and not fluid.core.is_compiled_with_cuda(): return - save_path = "word2vec.params" - train(use_cuda, partial(train_program, is_sparse), save_path) - infer(use_cuda, partial(inference_program, is_sparse), save_path) + save_path = "word2vec.inference.model" + + train( + use_cuda=use_cuda, + train_program=partial(train_program, is_sparse), + save_dirname=save_path) + + infer( + use_cuda=use_cuda, + inference_program=partial(inference_program, is_sparse), + save_dirname=save_path) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 921260ef3f4b1f9e4c65b3ffb440dc34cb0a9376..8569d838bdd414eb84c6c87674990a25a2fdcdf9 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -109,6 +109,24 @@ class TestDetection(unittest.TestCase): print(str(program)) +class TestPriorBox(unittest.TestCase): + def test_prior_box(self): + data_shape = [3, 224, 224] + images = fluid.layers.data( + name='pixel', shape=data_shape, dtype='float32') + conv1 = fluid.layers.conv2d(images, 3, 3, 2) + box, var = layers.prior_box( + input=conv1, + image=images, + min_sizes=[100.0], + aspect_ratios=[1.], + flip=True, + clip=True) + assert len(box.shape) == 4 + assert box.shape == var.shape + assert box.shape[3] == 4 + + class TestMultiBoxHead(unittest.TestCase): def test_multi_box_head(self): data_shape = [3, 224, 224] diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index 77e9a8f7e72a9e0790ce1d1f48356abcca8eaccf..c2393a288c6ebb5dd4a12f7b591d12cc94f4ea55 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -52,15 +52,18 @@ class TestSendOp(unittest.TestCase): serv = layers.ListenAndServ( "127.0.0.1:0", ["X"], optimizer_mode=False) with serv.do(): + out_var = main.global_block().create_var( + name="scale_0.tmp_0", + psersistable=True, + dtype="float32", + shape=[32, 32]) x = layers.data( shape=[32, 32], dtype='float32', name="X", append_batch_size=False) fluid.initializer.Constant(value=1.0)(x, main.global_block()) - o = layers.scale(x=x, scale=10.0) - main.global_block().create_var( - name=o.name, psersistable=False, dtype=o.dtype, shape=o.shape) + layers.scale(x=x, scale=10.0, out=out_var) self.server_exe = fluid.Executor(place) self.server_exe.run(main) diff --git a/python/paddle/fluid/tests/unittests/test_is_empty_op.py b/python/paddle/fluid/tests/unittests/test_is_empty_op.py index 4d11cf226be2ba4ffbe015198fed3191f1e02f72..11121d9b65351eab639b7618fac0e54714cf4680 100644 --- a/python/paddle/fluid/tests/unittests/test_is_empty_op.py +++ b/python/paddle/fluid/tests/unittests/test_is_empty_op.py @@ -14,42 +14,24 @@ import unittest import numpy as np -from paddle.fluid.op import Operator -import paddle.fluid.core as core +from op_test import OpTest -def create_tensor(scope, name, np_data): - tensor = scope.var(name).get_tensor() - tensor.set_dims(np_data.shape) - tensor.set(np_data, core.CPUPlace()) - return tensor - - -class TestIsEmptyOp(unittest.TestCase): +class TestEmpty(OpTest): def setUp(self): - self.scope = core.Scope() - # create input variables - np_data0 = np.array([0, 1, 2]) - create_tensor(self.scope, "X0", np_data0) - - np_data1 = np.array([1]) - t = create_tensor(self.scope, "X1", np_data1) - t.set_dims([0]) + self.op_type = "is_empty" + self.inputs = {'X': np.array([1, 2, 3])} + self.outputs = {'Out': np.array([False])} - # create output variables - self.scope.var("out") + def test_check_output(self): + self.check_output() - def test_no_empty(self): - self.one_case("X0", False) - def test_empty(self): - self.one_case("X1", True) - - def one_case(self, input, target): - op = Operator(type="is_empty", X=input, Out="out") - op.run(self.scope, core.CPUPlace()) - out = self.scope.var("out").get_tensor() - self.assertEqual(np.array(out)[0], target) +class TestNotEmpty(TestEmpty): + def setUp(self): + self.op_type = "is_empty" + self.inputs = {'X': np.array([])} + self.outputs = {'Out': np.array([True])} if __name__ == "__main__": diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index d158d586321833fdf046e4e061bfa8460b9a31b5..7da123dd92ed9d111d68cd70efb8ce1493452609 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import contextlib import os + import core -import framework -import executor + import data_feeder -import contextlib +import executor +import framework import io -import unique_name -import parallel_executor - # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module import optimizer as opt_module +import parallel_executor from transpiler import distribute_transpiler __all__ = [ @@ -100,6 +100,7 @@ class Trainer(object): param_path=None, place=None, parallel=False): + self.__stop = False self.parallel = parallel # 1. we need to generate a framework.Program by calling # program_func. Reference: fluid.program_guard in @@ -210,6 +211,12 @@ class Trainer(object): 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' ) + def stop(self): + """ + stop training + """ + self.__stop = True + def train(self, num_epochs, event_handler, reader=None, feed_order=None): """ Train the model. @@ -289,6 +296,8 @@ class Trainer(object): for epoch_id in range(num_epochs): event_handler(BeginEpochEvent(epoch_id)) for step_id, data in enumerate(reader()): + if self.__stop: + return begin_event = BeginStepEvent(epoch_id, step_id) event_handler(begin_event) if begin_event.fetch_metrics: @@ -327,9 +336,7 @@ class Trainer(object): feeder = data_feeder.DataFeeder( feed_list=feed_var_list, place=self.place) reader = feeder.decorate_reader(reader, multi_devices=True) - for epoch_id in range(num_epochs): - self._train_by_any_executor(event_handler, pe, num_epochs, - reader) + self._train_by_any_executor(event_handler, pe, num_epochs, reader) def _get_parallel_executor(self): return getattr(self, 'parallel_executor', None) diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index 49034b47b2d184e4027bcebc29413a163340fdaa..80a8f7c09cfe521f8f94a27e85fc8d86c02b3e97 100644 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -24,7 +24,8 @@ dtype_to_size = { core.VarDesc.VarType.INT16: 2, core.VarDesc.VarType.INT32: 4, core.VarDesc.VarType.INT64: 8, - core.VarDesc.VarType.BOOL: 1 + core.VarDesc.VarType.BOOL: 1, + core.VarDesc.VarType.UINT8: 1, } SUB_BLOCK_OPS = [ diff --git a/tools/timeline.py b/tools/timeline.py index 8cd6353d46f496831cb61c1cdbbd156ca0579fb4..b413bb6fe0505df8fb09fa0759fefb6509b95bc9 100644 --- a/tools/timeline.py +++ b/tools/timeline.py @@ -171,7 +171,7 @@ if args.timeline_path: profile_paths = profile_path.split(',') profile_dict = dict() -if len(profile_path) == 1: +if len(profile_paths) == 1: with open(profile_path, 'r') as f: profile_s = f.read() profile_pb = profiler_pb2.Profile()