From 3e1e482b4960c8030753ade44c0aa61d89187642 Mon Sep 17 00:00:00 2001 From: cifar10 <41565156+cifar10@users.noreply.github.com> Date: Mon, 26 Sep 2022 15:41:16 +0800 Subject: [PATCH] [MLU] fluid: add mluop (#46429) --- cmake/neuware.cmake | 4 +- paddle/fluid/operators/mlu/mlu_baseop.cc | 220 +++++++++++++++++- paddle/fluid/operators/mlu/mlu_baseop.h | 121 ++++++++++ .../platform/device/mlu/device_context.cc | 11 +- .../platform/device/mlu/device_context.h | 19 ++ paddle/fluid/platform/device/mlu/enforce.h | 10 + paddle/fluid/platform/device/mlu/mlu_info.cc | 7 + paddle/fluid/platform/device/mlu/mlu_info.h | 8 +- .../unittests/mlu/test_grid_sampler_op_mlu.py | 39 ++-- .../unittests/mlu/test_reduce_sum_op_mlu.py | 5 + tools/dockerfile/Dockerfile.mlu | 37 +-- 11 files changed, 434 insertions(+), 47 deletions(-) diff --git a/cmake/neuware.cmake b/cmake/neuware.cmake index 16dbf16899..8c873f35b7 100644 --- a/cmake/neuware.cmake +++ b/cmake/neuware.cmake @@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64) include_directories(${NEUWARE_INCLUDE_DIR}) set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so) +set(MLUOP_LIB ${NEUWARE_LIB_DIR}/libmluops.so) set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so) set(CNDRV_LIB ${NEUWARE_LIB_DIR}/libcndrv.so) set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so) generate_dummy_static_lib(LIB_NAME "neuware_lib" GENERATOR "neuware.cmake") -set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${CNRT_LIB} ${CNDRV_LIB} ${CNPAPI_LIB}) +set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${MLUOP_LIB} ${CNRT_LIB} ${CNDRV_LIB} + ${CNPAPI_LIB}) if(WITH_CNCL) message(STATUS "Compile with CNCL!") diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc index 4cd754775d..5d0ccc9f72 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.cc +++ b/paddle/fluid/operators/mlu/mlu_baseop.cc @@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() { } } +class MLUOpTensorDescPool { + public: + mluOpTensorDescriptor_t Pop() { + mluOpTensorDescriptor_t raw_desc; + if (q_.try_dequeue(raw_desc)) { + return raw_desc; + } else { + mluOpCreateTensorDescriptor(&raw_desc); + return raw_desc; + } + } + + void Recycle(mluOpTensorDescriptor_t desc) { + mluOpResetTensorDescriptor(desc); + q_.enqueue(desc); + } + + ~MLUOpTensorDescPool() { + auto size = q_.size_approx(); + if (size > 0) { + std::vector vec(size); + q_.try_dequeue_bulk(vec.data(), size); + for (auto desc : vec) { + mluOpDestroyTensorDescriptor(desc); + } + } + } + + private: + moodycamel::ConcurrentQueue q_; +}; + +static MLUOpTensorDescPool g_mluop_tensor_desc_pool; + +MLUOpTensorDesc& MLUOpTensorDesc::operator=(MLUOpTensorDesc&& rhs) { + if (raw_tensor_desc) { + g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc); + } + raw_tensor_desc = rhs.raw_tensor_desc; + rhs.raw_tensor_desc = nullptr; + return *this; +} + +MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim, + const int dim_sizes[], + const mluOpDataType_t tensor_dtype) { + raw_tensor_desc = g_mluop_tensor_desc_pool.Pop(); + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc, + MLUOP_LAYOUT_ARRAY, + tensor_dtype, + tensor_dim, + dim_sizes)); +} + +MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim, + const int dim_sizes[], + const mluOpDataType_t tensor_dtype, + const mluOpTensorLayout_t layout) { + raw_tensor_desc = g_mluop_tensor_desc_pool.Pop(); + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor( + raw_tensor_desc, layout, tensor_dtype, tensor_dim, dim_sizes)); +} + +MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim, + const int dim_sizes[], + const mluOpDataType_t tensor_dtype, + int position) + : MLUOpTensorDesc(tensor_dim, dim_sizes, tensor_dtype) { + PADDLE_ENFORCE_MLU_SUCCESS( + mluOpSetTensorDescriptorPosition(raw_tensor_desc, position)); +} + +MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim, + const int64_t dim_sizes[], + const mluOpDataType_t tensor_dtype) { + std::vector dim_sizes_int32(tensor_dim); + std::vector::const_iterator int64_cbegin(dim_sizes); + std::vector::const_iterator int64_cend(dim_sizes + tensor_dim); + std::transform(int64_cbegin, + int64_cend, + dim_sizes_int32.begin(), + &CheckedNarrowing); + raw_tensor_desc = g_mluop_tensor_desc_pool.Pop(); + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc, + MLUOP_LAYOUT_ARRAY, + tensor_dtype, + tensor_dim, + dim_sizes_int32.data())); +} + +MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim, + const int64_t dim_sizes[], + const mluOpDataType_t tensor_dtype, + const mluOpTensorLayout_t layout) { + std::vector dim_sizes_int32(tensor_dim); + std::vector::const_iterator int64_cbegin(dim_sizes); + std::vector::const_iterator int64_cend(dim_sizes + tensor_dim); + std::transform(int64_cbegin, + int64_cend, + dim_sizes_int32.begin(), + &CheckedNarrowing); + raw_tensor_desc = g_mluop_tensor_desc_pool.Pop(); + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc, + layout, + tensor_dtype, + tensor_dim, + dim_sizes_int32.data())); +} + +MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim, + const int64_t dim_sizes[], + const mluOpDataType_t tensor_dtype, + int position) { + std::vector dim_sizes_int32(tensor_dim); + std::vector::const_iterator int64_cbegin(dim_sizes); + std::vector::const_iterator int64_cend(dim_sizes + tensor_dim); + std::transform(int64_cbegin, + int64_cend, + dim_sizes_int32.begin(), + &CheckedNarrowing); + raw_tensor_desc = g_mluop_tensor_desc_pool.Pop(); + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc, + MLUOP_LAYOUT_ARRAY, + tensor_dtype, + tensor_dim, + dim_sizes_int32.data())); + PADDLE_ENFORCE_MLU_SUCCESS( + mluOpSetTensorDescriptorPosition(raw_tensor_desc, position)); +} + +MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, + const mluOpTensorLayout_t layout, + const mluOpDataType_t tensor_dtype) { + auto dims = phi::vectorize(tensor.dims()); + int tensor_dim = dims.size(); + raw_tensor_desc = g_mluop_tensor_desc_pool.Pop(); + if (tensor_dim == 0) { + int scalar_dims[1] = {1}; + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor( + raw_tensor_desc, layout, tensor_dtype, 1, scalar_dims)); + } else { + std::vector tensor_dim_sizes_int(dims.begin(), dims.end()); + PADDLE_ENFORCE_MLU_SUCCESS( + mluOpSetTensorDescriptor(raw_tensor_desc, + layout, + tensor_dtype, + tensor_dim, + tensor_dim_sizes_int.data())); + } +} + +MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor) + : MLUOpTensorDesc( + tensor, MLUOP_LAYOUT_ARRAY, ToMluOpDataType(tensor.dtype())) {} + +MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, + mluOpTensorLayout_t layout, + const mluOpDataType_t tensor_dtype, + int position) + : MLUOpTensorDesc(tensor, layout, tensor_dtype) { + PADDLE_ENFORCE_MLU_SUCCESS( + mluOpSetTensorDescriptorPosition(raw_tensor_desc, position)); +} + +MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, + mluOpTensorLayout_t layout, + const mluOpDataType_t tensor_dtype, + int position, + float scale) + : MLUOpTensorDesc(tensor, layout, tensor_dtype) { + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptorPositionAndScale( + raw_tensor_desc, position, scale)); +} + +MLUOpTensorDesc::~MLUOpTensorDesc() { + if (raw_tensor_desc) { + g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc); + } +} + MLUCnnlActivationDesc::MLUCnnlActivationDesc( const cnnlActivationMode_t act_mode, const float ceof) { PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateActivationDescriptor(&active_desc_)); @@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() { void* indices_out) { cnnlHandle_t handle = GetHandleFromCTX(ctx); - PADDLE_ENFORCE_MLU_SUCCESS(cnnlTopKTensor(handle, - input_desc, - input, - k, - dim, - largest, - sorted, - values_output_desc, - values_out, - indices_output_desc, - indices_out)); + size_t workspace_size; + PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetTopKTensorWorkspaceSize(handle, + input_desc, + k, + dim, + largest, + values_output_desc, + indices_output_desc, + &workspace_size)); + + auto& dev_ctx = GetDevCtxFromCTX(ctx); + Tensor workspace = ctx.AllocateTmpTensor( + {static_cast(workspace_size)}, dev_ctx); + void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); + + PADDLE_ENFORCE_MLU_SUCCESS(cnnlTopKTensor_v3(handle, + input_desc, + input, + k, + dim, + largest, + sorted, + false /*lower_index_first*/, + workspace_ptr, + workspace_size, + values_output_desc, + values_out, + indices_output_desc, + indices_out)); } /* static */ void MLUCnnl::StridedSlice( diff --git a/paddle/fluid/operators/mlu/mlu_baseop.h b/paddle/fluid/operators/mlu/mlu_baseop.h index e56331b272..4c728df4e4 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.h +++ b/paddle/fluid/operators/mlu/mlu_baseop.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include +#include #include #include @@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() { return ToCnnlDataType(type); } +inline mluOpDataType_t ToMluOpDataType( + const paddle::experimental::DataType& dtype) { + mluOpDataType_t type = MLUOP_DTYPE_FLOAT; + switch (dtype) { + case DataType::FLOAT16: + type = MLUOP_DTYPE_HALF; + break; + case DataType::FLOAT32: + type = MLUOP_DTYPE_FLOAT; + break; + case DataType::FLOAT64: + type = MLUOP_DTYPE_DOUBLE; + break; + case DataType::INT8: + type = MLUOP_DTYPE_INT8; + break; + case DataType::INT16: + type = MLUOP_DTYPE_INT16; + break; + case DataType::INT32: + type = MLUOP_DTYPE_INT32; + break; + case DataType::INT64: + type = MLUOP_DTYPE_INT64; + break; + case DataType::BOOL: + type = MLUOP_DTYPE_BOOL; + break; + case DataType::UINT8: + type = MLUOP_DTYPE_UINT8; + break; + default: + break; + } + return type; +} + +inline mluOpDataType_t ToMluOpDataType( + const paddle::framework::proto::VarType::Type& type) { + return ToMluOpDataType(framework::TransToPhiDataType(type)); +} + +template +inline mluOpDataType_t ToMluOpDataType() { + auto type = framework::ToDataType(std::type_index(typeid(T))); + return ToMluOpDataType(type); +} + // Converts (via narrowing) a type T value to a type U, and checks that the // value has no value change due to the conversion. template @@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) { return ctx.template device_context().cnnl_handle(); } +inline static mluOpHandle_t GetMLUOpHandleFromCTX(const ExecutionContext& ctx) { + return ctx.template device_context().mluOp_handle(); +} + inline static const MLUDeviceContext& GetDevCtxFromCTX( const ExecutionContext& ctx) { return ctx.template device_context(); @@ -281,6 +334,74 @@ class MLUCnnlTensorDesc { cnnlTensorDescriptor_t raw_tensor_desc = nullptr; }; +class MLUOpTensorDesc { + public: + MLUOpTensorDesc() {} + + // SE_DISALLOW_COPY_AND_ASSIGN + MLUOpTensorDesc(const MLUOpTensorDesc& desc) = delete; + MLUOpTensorDesc& operator=(const MLUOpTensorDesc&) = delete; + + MLUOpTensorDesc(MLUOpTensorDesc&& rhs) + : raw_tensor_desc(rhs.raw_tensor_desc) { + rhs.raw_tensor_desc = nullptr; + } + + MLUOpTensorDesc& operator=(MLUOpTensorDesc&& rhs); + + MLUOpTensorDesc(const int tensor_dim, + const int dim_sizes[], + const mluOpDataType_t tensor_dtype); + + MLUOpTensorDesc(const int tensor_dim, + const int dim_sizes[], + const mluOpDataType_t tensor_dtype, + const mluOpTensorLayout_t layout); + + MLUOpTensorDesc(const int tensor_dim, + const int dim_sizes[], + const mluOpDataType_t tensor_dtype, + int position); + + MLUOpTensorDesc(const int tensor_dim, + const int64_t dim_sizes[], + const mluOpDataType_t tensor_dtype); + + MLUOpTensorDesc(const int tensor_dim, + const int64_t dim_sizes[], + const mluOpDataType_t tensor_dtype, + const mluOpTensorLayout_t layout); + + MLUOpTensorDesc(const int tensor_dim, + const int64_t dim_sizes[], + const mluOpDataType_t tensor_dtype, + int position); + + MLUOpTensorDesc(const Tensor& tensor, + const mluOpTensorLayout_t layout, + const mluOpDataType_t tensor_dtype); + + explicit MLUOpTensorDesc(const Tensor& tensor); + + MLUOpTensorDesc(const Tensor& tensor, + mluOpTensorLayout_t layout, + const mluOpDataType_t tensor_dtype, + int position); + + MLUOpTensorDesc(const Tensor& tensor, + mluOpTensorLayout_t layout, + const mluOpDataType_t tensor_dtype, + int position, + float scale); + + ~MLUOpTensorDesc(); + + const mluOpTensorDescriptor_t get() const { return raw_tensor_desc; } + + private: + mluOpTensorDescriptor_t raw_tensor_desc = nullptr; +}; + class MLUCnnlActivationDesc { public: MLUCnnlActivationDesc(const MLUCnnlActivationDesc& desc) = delete; diff --git a/paddle/fluid/platform/device/mlu/device_context.cc b/paddle/fluid/platform/device/mlu/device_context.cc index 087b480332..796d700683 100644 --- a/paddle/fluid/platform/device/mlu/device_context.cc +++ b/paddle/fluid/platform/device/mlu/device_context.cc @@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) { MLUDeviceGuard guard(place_.device); stream_.reset(new stream::MLUStream(place_, priority)); InitCNNLContext(); + InitMLUOPContext(); } MLUContext::~MLUContext() { MLUDeviceGuard guard(place_.device); DestoryCNNLContext(); + DestoryMLUOPContext(); } MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) { @@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) { driver_version_ = GetMLUDriverVersion(place_.device); runtime_version_ = GetMLURuntimeVersion(place_.device); cnnl_version_ = GetMLUCnnlVersion(place_.device); + mluOp_version_ = GetMLUOpVersion(place_.device); LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: " << static_cast(place_.device) @@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) { << ", Runtime API Version: " << runtime_version_ / 10000 << "." << (runtime_version_ / 100) % 100 << "." << runtime_version_ % 100 << ", Cnnl API Version: " << cnnl_version_ / 10000 << "." - << (cnnl_version_ / 100) % 100 << "." << cnnl_version_ % 100; + << (cnnl_version_ / 100) % 100 << "." << cnnl_version_ % 100 + << ", MluOp API Version: " << mluOp_version_ / 10000 << "." + << (mluOp_version_ / 100) % 100 << "." << mluOp_version_ % 100; default_ctx_.reset(new MLUContext(place_)); } @@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const { return context()->CnnlHandle(); } +mluOpHandle MLUDeviceContext::mluOp_handle() const { + return context()->MluOpHandle(); +} + mluStream MLUDeviceContext::stream() const { return context()->RawStream(); } #endif diff --git a/paddle/fluid/platform/device/mlu/device_context.h b/paddle/fluid/platform/device/mlu/device_context.h index d8bb762315..e1028667bc 100644 --- a/paddle/fluid/platform/device/mlu/device_context.h +++ b/paddle/fluid/platform/device/mlu/device_context.h @@ -53,12 +53,19 @@ class MLUContext { const mluCnnlHandle& CnnlHandle() const { return cnnl_handle_; } + const mluOpHandle& MluOpHandle() const { return mluOp_handle_; } + private: void InitCNNLContext() { PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreate(&cnnl_handle_)); PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetQueue(cnnl_handle_, RawStream())); } + void InitMLUOPContext() { + PADDLE_ENFORCE_MLU_SUCCESS(mluOpCreate(&mluOp_handle_)); + PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetQueue(mluOp_handle_, RawStream())); + } + void DestoryCNNLContext() { if (cnnl_handle_) { PADDLE_ENFORCE_MLU_SUCCESS(cnnlDestroy(cnnl_handle_)); @@ -66,10 +73,18 @@ class MLUContext { cnnl_handle_ = nullptr; } + void DestoryMLUOPContext() { + if (mluOp_handle_) { + PADDLE_ENFORCE_MLU_SUCCESS(mluOpDestroy(mluOp_handle_)); + } + mluOp_handle_ = nullptr; + } + MLUPlace place_; std::unique_ptr eigen_device_; std::unique_ptr stream_; mluCnnlHandle cnnl_handle_; + mluOpHandle mluOp_handle_; DISABLE_COPY_AND_ASSIGN(MLUContext); }; @@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext { /*! \brief Return cnnl handle in the device context. */ mluCnnlHandle cnnl_handle() const; + /*! \brief Return mluOp handle in the device context. */ + mluOpHandle mluOp_handle() const; + /*! \brief Return mlu stream in the device context. */ mluStream stream() const; @@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext { int driver_version_; int runtime_version_; int cnnl_version_; + int mluOp_version_; MLUPlace place_; std::shared_ptr default_ctx_; diff --git a/paddle/fluid/platform/device/mlu/enforce.h b/paddle/fluid/platform/device/mlu/enforce.h index 05327a771d..8b0d0bb36f 100644 --- a/paddle/fluid/platform/device/mlu/enforce.h +++ b/paddle/fluid/platform/device/mlu/enforce.h @@ -41,6 +41,7 @@ struct MLUStatusType {}; DEFINE_MLU_STATUS_TYPE(cnrtStatus, cnrtSuccess, CNRT); DEFINE_MLU_STATUS_TYPE(cnnlStatus, CNNL_STATUS_SUCCESS, CNNL); +DEFINE_MLU_STATUS_TYPE(mluOpStatus, MLUOP_STATUS_SUCCESS, MLUOP); DEFINE_MLU_STATUS_TYPE(cnStatus, CN_SUCCESS, CN); #ifdef PADDLE_WITH_CNCL DEFINE_MLU_STATUS_TYPE(cnclStatus, CNCL_RET_SUCCESS, CNCL); @@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) { return sout.str(); } +/*************** MLU OP ERROR ***************/ +inline bool is_error(mluOpStatus stat) { return stat != MLUOP_STATUS_SUCCESS; } + +inline std::string build_mlu_error_msg(mluOpStatus stat) { + std::ostringstream sout; + sout << "MLU OP error(" << stat << "), " << mluOpGetErrorString(stat) << ". "; + return sout.str(); +} + /*************** CN API ERROR ***************/ inline bool is_error(cnStatus stat) { return stat != CN_SUCCESS; } diff --git a/paddle/fluid/platform/device/mlu/mlu_info.cc b/paddle/fluid/platform/device/mlu/mlu_info.cc index e27720849e..a2e063397b 100644 --- a/paddle/fluid/platform/device/mlu/mlu_info.cc +++ b/paddle/fluid/platform/device/mlu/mlu_info.cc @@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) { return x * 10000 + y * 100 + z; } +int GetMLUOpVersion(int id) { + CheckDeviceId(id); + int x, y, z; + mluOpGetLibVersion(&x, &y, &z); + return x * 10000 + y * 100 + z; +} + int GetMLUCurrentDeviceId() { int device_id; PADDLE_ENFORCE_MLU_SUCCESS(cnrtGetDevice(&device_id)); diff --git a/paddle/fluid/platform/device/mlu/mlu_info.h b/paddle/fluid/platform/device/mlu/mlu_info.h index 14f37879ef..c0cd24f00f 100644 --- a/paddle/fluid/platform/device/mlu/mlu_info.h +++ b/paddle/fluid/platform/device/mlu/mlu_info.h @@ -16,10 +16,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_MLU #include -#include #include #include +#include #include +#include #ifdef PADDLE_WITH_CNCL #include #endif @@ -30,11 +31,13 @@ namespace paddle { using cnStatus = CNresult; using cnrtStatus = cnrtRet_t; using cnnlStatus = cnnlStatus_t; +using mluOpStatus = mluOpStatus_t; #ifdef PADDLE_WITH_CNCL using cnclStatus = cnclResult_t; #endif using mluStream = cnrtQueue_t; using mluCnnlHandle = cnnlHandle_t; +using mluOpHandle = mluOpHandle_t; using mluEventHandle = cnrtNotifier_t; using mluDeviceHandle = CNdev; @@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id); //! Get the cnnl version of the ith MLU. int GetMLUCnnlVersion(int id); +//! Get the mluOp version of the ith MLU. +int GetMLUOpVersion(int id); + //! Get the total number of MLU devices in system. int GetMLUDeviceCount(); diff --git a/python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py index 032c2e9a50..96dbaab9ee 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py @@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest): self.mode = "bilinear" -# TODO(fwg): Test this case when cnnl support align_corners = True. -# class Case1(TestGridSamplerOp): -# -# def initTestCase(self): -# self.x_shape = (2, 3, 5, 6) -# self.grid_shape = (2, 8, 9, 2) -# self.theta_shape = (2, 2, 3) -# self.align_corners = True -# self.padding_mode = "zeros" -# self.mode = "bilinear" +class Case1(TestGridSamplerOp): + + def initTestCase(self): + self.x_shape = (2, 3, 5, 6) + self.grid_shape = (2, 8, 9, 2) + self.theta_shape = (2, 2, 3) + self.align_corners = True + self.padding_mode = "zeros" + self.mode = "bilinear" class LargeInputCase(TestGridSamplerOp): @@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp): self.mode = "bilinear" -# TODO(fwg): Test this case when cnnl support align_corners = True. -# class Case2(LargeInputCase): -# -# def initTestCase(self): -# self.x_shape = (2, 3, 128, 128) -# self.grid_shape = (2, 130, 130, 2) -# self.theta_shape = (2, 2, 3) -# self.align_corners = True -# self.padding_mode = "zeros" -# self.mode = "bilinear" +class Case2(LargeInputCase): + + def initTestCase(self): + self.x_shape = (2, 3, 128, 128) + self.grid_shape = (2, 130, 130, 2) + self.theta_shape = (2, 2, 3) + self.align_corners = True + self.padding_mode = "zeros" + self.mode = "bilinear" + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py index ab98418744..3a9f0f3f6f 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py @@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp): self.axis = (3, 4, 5) self.keep_dim = True + def test_check_grad(self): + self.check_grad_with_place(self.place, ['X'], + 'Out', + max_relative_error=0.03) + class TestReduceAll(TestMLUReduceSumOp): diff --git a/tools/dockerfile/Dockerfile.mlu b/tools/dockerfile/Dockerfile.mlu index b3edb25fd5..65ab49dd77 100644 --- a/tools/dockerfile/Dockerfile.mlu +++ b/tools/dockerfile/Dockerfile.mlu @@ -1,15 +1,17 @@ # A image for building paddle binaries -# Update CNTOOLKIT_VERSION, CNNL_VERSION and CNCL_VERSION if using other versions +# Update CNTOOLKIT_VERSION, CNNL_VERSION, CNCL_VERSION and MLUOPS_VERSION if using other versions # # Build: -# - CNTOOLKIT_VERSION 2.8.5 -# - CNNL_VERSION 1.10.5 -# - CNCL_VERSION 1.1.2 +# - CNTOOLKIT_VERSION 3.0.2-1 +# - CNNL_VERSION 1.13.0-1 +# - CNCL_VERSION 1.2.1-1 +# - MLUOPS_VERSION 0.2.0-1 # # Download three packages from FTP (need to connect cambricon AE to get FTP url) -# - cntoolkit_2.8.5.ubuntu18.04_amd64.deb -# - cnnl_1.10.5.ubuntu18.04_amd64.deb -# - cncl_1.1.2.ubuntu18.04_amd64.deb +# - cntoolkit_3.0.2-1.ubuntu18.04_amd64.deb +# - cnnl_1.13.0-1.ubuntu18.04_amd64.deb +# - cncl_1.2.1-1.ubuntu18.04_amd64.deb +# - mluops_0.2.0-1.ubuntu18.04_amd64.deb # copy them to current directory first, then run build commands # # For example: @@ -19,11 +21,13 @@ # (get cntoolkit pkg) # (get cnnl pkg) # (get cncl pkg) +# (get mluops pkg) # # docker build -f Dockerfile.mlu \ -# --build-arg CNTOOLKIT_VERSION=2.8.5 \ -# --build-arg CNNL_VERSION=1.10.5 \ -# --build-arg CNCL_VERSION=1.1.2 \ +# --build-arg CNTOOLKIT_VERSION=3.0.2-1 \ +# --build-arg CNNL_VERSION=1.13.0-1 \ +# --build-arg CNCL_VERSION=1.2.1-1 \ +# --build-arg MLUOPS_VERSION=0.2.0-1 \ # -t paddlepaddle/paddle:latest-dev-mlu . # # without mlu device: @@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors ENV WITH_GPU=OFF -ARG CNTOOLKIT_VERSION=2.8.5 -ARG CNNL_VERSION=1.10.5 -ARG CNCL_VERSION=1.1.2 +ARG CNTOOLKIT_VERSION=3.0.2-1 +ARG CNNL_VERSION=1.13.0-1 +ARG CNCL_VERSION=1.2.1-1 +ARG MLUOPS_VERSION=0.2.0-1 ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb +ARG MLUOPS_PKG=mluops_$MLUOPS_VERSION.ubuntu18.04_amd64.deb # install cntoolkit COPY $CNTOOLKIT_PKG ./ @@ -67,6 +73,11 @@ COPY $CNCL_PKG ./ RUN dpkg -i $CNCL_PKG && \ rm -f $CNCL_PKG +# install mluops +COPY $MLUOPS_PKG ./ +RUN dpkg -i $MLUOPS_PKG && \ + rm -f $MLUOPS_PKG + # Clean RUN apt-get clean -y -- GitLab