未验证 提交 3e1e482b 编写于 作者: C cifar10 提交者: GitHub

[MLU] fluid: add mluop (#46429)

上级 b0ec8efb
...@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64) ...@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
include_directories(${NEUWARE_INCLUDE_DIR}) include_directories(${NEUWARE_INCLUDE_DIR})
set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so) set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so)
set(MLUOP_LIB ${NEUWARE_LIB_DIR}/libmluops.so)
set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so) set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so)
set(CNDRV_LIB ${NEUWARE_LIB_DIR}/libcndrv.so) set(CNDRV_LIB ${NEUWARE_LIB_DIR}/libcndrv.so)
set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so) set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so)
generate_dummy_static_lib(LIB_NAME "neuware_lib" GENERATOR "neuware.cmake") generate_dummy_static_lib(LIB_NAME "neuware_lib" GENERATOR "neuware.cmake")
set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${CNRT_LIB} ${CNDRV_LIB} ${CNPAPI_LIB}) set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${MLUOP_LIB} ${CNRT_LIB} ${CNDRV_LIB}
${CNPAPI_LIB})
if(WITH_CNCL) if(WITH_CNCL)
message(STATUS "Compile with CNCL!") message(STATUS "Compile with CNCL!")
......
...@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() { ...@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
} }
} }
class MLUOpTensorDescPool {
public:
mluOpTensorDescriptor_t Pop() {
mluOpTensorDescriptor_t raw_desc;
if (q_.try_dequeue(raw_desc)) {
return raw_desc;
} else {
mluOpCreateTensorDescriptor(&raw_desc);
return raw_desc;
}
}
void Recycle(mluOpTensorDescriptor_t desc) {
mluOpResetTensorDescriptor(desc);
q_.enqueue(desc);
}
~MLUOpTensorDescPool() {
auto size = q_.size_approx();
if (size > 0) {
std::vector<mluOpTensorDescriptor_t> vec(size);
q_.try_dequeue_bulk(vec.data(), size);
for (auto desc : vec) {
mluOpDestroyTensorDescriptor(desc);
}
}
}
private:
moodycamel::ConcurrentQueue<mluOpTensorDescriptor_t> q_;
};
static MLUOpTensorDescPool g_mluop_tensor_desc_pool;
MLUOpTensorDesc& MLUOpTensorDesc::operator=(MLUOpTensorDesc&& rhs) {
if (raw_tensor_desc) {
g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc);
}
raw_tensor_desc = rhs.raw_tensor_desc;
rhs.raw_tensor_desc = nullptr;
return *this;
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype) {
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
MLUOP_LAYOUT_ARRAY,
tensor_dtype,
tensor_dim,
dim_sizes));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout) {
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
raw_tensor_desc, layout, tensor_dtype, tensor_dim, dim_sizes));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position)
: MLUOpTensorDesc(tensor_dim, dim_sizes, tensor_dtype) {
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype) {
std::vector<int> dim_sizes_int32(tensor_dim);
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
std::transform(int64_cbegin,
int64_cend,
dim_sizes_int32.begin(),
&CheckedNarrowing<int64_t, int>);
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
MLUOP_LAYOUT_ARRAY,
tensor_dtype,
tensor_dim,
dim_sizes_int32.data()));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout) {
std::vector<int> dim_sizes_int32(tensor_dim);
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
std::transform(int64_cbegin,
int64_cend,
dim_sizes_int32.begin(),
&CheckedNarrowing<int64_t, int>);
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
layout,
tensor_dtype,
tensor_dim,
dim_sizes_int32.data()));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position) {
std::vector<int> dim_sizes_int32(tensor_dim);
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
std::transform(int64_cbegin,
int64_cend,
dim_sizes_int32.begin(),
&CheckedNarrowing<int64_t, int>);
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
MLUOP_LAYOUT_ARRAY,
tensor_dtype,
tensor_dim,
dim_sizes_int32.data()));
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
const mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype) {
auto dims = phi::vectorize<int>(tensor.dims());
int tensor_dim = dims.size();
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
if (tensor_dim == 0) {
int scalar_dims[1] = {1};
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
raw_tensor_desc, layout, tensor_dtype, 1, scalar_dims));
} else {
std::vector<int> tensor_dim_sizes_int(dims.begin(), dims.end());
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptor(raw_tensor_desc,
layout,
tensor_dtype,
tensor_dim,
tensor_dim_sizes_int.data()));
}
}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor)
: MLUOpTensorDesc(
tensor, MLUOP_LAYOUT_ARRAY, ToMluOpDataType(tensor.dtype())) {}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position)
: MLUOpTensorDesc(tensor, layout, tensor_dtype) {
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position,
float scale)
: MLUOpTensorDesc(tensor, layout, tensor_dtype) {
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptorPositionAndScale(
raw_tensor_desc, position, scale));
}
MLUOpTensorDesc::~MLUOpTensorDesc() {
if (raw_tensor_desc) {
g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc);
}
}
MLUCnnlActivationDesc::MLUCnnlActivationDesc( MLUCnnlActivationDesc::MLUCnnlActivationDesc(
const cnnlActivationMode_t act_mode, const float ceof) { const cnnlActivationMode_t act_mode, const float ceof) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateActivationDescriptor(&active_desc_)); PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateActivationDescriptor(&active_desc_));
...@@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() { ...@@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() {
void* indices_out) { void* indices_out) {
cnnlHandle_t handle = GetHandleFromCTX(ctx); cnnlHandle_t handle = GetHandleFromCTX(ctx);
PADDLE_ENFORCE_MLU_SUCCESS(cnnlTopKTensor(handle, size_t workspace_size;
input_desc, PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetTopKTensorWorkspaceSize(handle,
input, input_desc,
k, k,
dim, dim,
largest, largest,
sorted, values_output_desc,
values_output_desc, indices_output_desc,
values_out, &workspace_size));
indices_output_desc,
indices_out)); auto& dev_ctx = GetDevCtxFromCTX(ctx);
Tensor workspace = ctx.AllocateTmpTensor<int8_t, MLUDeviceContext>(
{static_cast<int64_t>(workspace_size)}, dev_ctx);
void* workspace_ptr = workspace.mutable_data(ctx.GetPlace());
PADDLE_ENFORCE_MLU_SUCCESS(cnnlTopKTensor_v3(handle,
input_desc,
input,
k,
dim,
largest,
sorted,
false /*lower_index_first*/,
workspace_ptr,
workspace_size,
values_output_desc,
values_out,
indices_output_desc,
indices_out));
} }
/* static */ void MLUCnnl::StridedSlice( /* static */ void MLUCnnl::StridedSlice(
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <cn_api.h> #include <cn_api.h>
#include <cnnl.h> #include <cnnl.h>
#include <concurrentqueue.h> #include <concurrentqueue.h>
#include <mlu_op.h>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() { ...@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() {
return ToCnnlDataType(type); return ToCnnlDataType(type);
} }
inline mluOpDataType_t ToMluOpDataType(
const paddle::experimental::DataType& dtype) {
mluOpDataType_t type = MLUOP_DTYPE_FLOAT;
switch (dtype) {
case DataType::FLOAT16:
type = MLUOP_DTYPE_HALF;
break;
case DataType::FLOAT32:
type = MLUOP_DTYPE_FLOAT;
break;
case DataType::FLOAT64:
type = MLUOP_DTYPE_DOUBLE;
break;
case DataType::INT8:
type = MLUOP_DTYPE_INT8;
break;
case DataType::INT16:
type = MLUOP_DTYPE_INT16;
break;
case DataType::INT32:
type = MLUOP_DTYPE_INT32;
break;
case DataType::INT64:
type = MLUOP_DTYPE_INT64;
break;
case DataType::BOOL:
type = MLUOP_DTYPE_BOOL;
break;
case DataType::UINT8:
type = MLUOP_DTYPE_UINT8;
break;
default:
break;
}
return type;
}
inline mluOpDataType_t ToMluOpDataType(
const paddle::framework::proto::VarType::Type& type) {
return ToMluOpDataType(framework::TransToPhiDataType(type));
}
template <typename T>
inline mluOpDataType_t ToMluOpDataType() {
auto type = framework::ToDataType(std::type_index(typeid(T)));
return ToMluOpDataType(type);
}
// Converts (via narrowing) a type T value to a type U, and checks that the // Converts (via narrowing) a type T value to a type U, and checks that the
// value has no value change due to the conversion. // value has no value change due to the conversion.
template <typename WideT, typename NarrowT> template <typename WideT, typename NarrowT>
...@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) { ...@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) {
return ctx.template device_context<MLUDeviceContext>().cnnl_handle(); return ctx.template device_context<MLUDeviceContext>().cnnl_handle();
} }
inline static mluOpHandle_t GetMLUOpHandleFromCTX(const ExecutionContext& ctx) {
return ctx.template device_context<MLUDeviceContext>().mluOp_handle();
}
inline static const MLUDeviceContext& GetDevCtxFromCTX( inline static const MLUDeviceContext& GetDevCtxFromCTX(
const ExecutionContext& ctx) { const ExecutionContext& ctx) {
return ctx.template device_context<MLUDeviceContext>(); return ctx.template device_context<MLUDeviceContext>();
...@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc { ...@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc {
cnnlTensorDescriptor_t raw_tensor_desc = nullptr; cnnlTensorDescriptor_t raw_tensor_desc = nullptr;
}; };
class MLUOpTensorDesc {
public:
MLUOpTensorDesc() {}
// SE_DISALLOW_COPY_AND_ASSIGN
MLUOpTensorDesc(const MLUOpTensorDesc& desc) = delete;
MLUOpTensorDesc& operator=(const MLUOpTensorDesc&) = delete;
MLUOpTensorDesc(MLUOpTensorDesc&& rhs)
: raw_tensor_desc(rhs.raw_tensor_desc) {
rhs.raw_tensor_desc = nullptr;
}
MLUOpTensorDesc& operator=(MLUOpTensorDesc&& rhs);
MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype);
MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout);
MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position);
MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype);
MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout);
MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position);
MLUOpTensorDesc(const Tensor& tensor,
const mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype);
explicit MLUOpTensorDesc(const Tensor& tensor);
MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position);
MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position,
float scale);
~MLUOpTensorDesc();
const mluOpTensorDescriptor_t get() const { return raw_tensor_desc; }
private:
mluOpTensorDescriptor_t raw_tensor_desc = nullptr;
};
class MLUCnnlActivationDesc { class MLUCnnlActivationDesc {
public: public:
MLUCnnlActivationDesc(const MLUCnnlActivationDesc& desc) = delete; MLUCnnlActivationDesc(const MLUCnnlActivationDesc& desc) = delete;
......
...@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) { ...@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) {
MLUDeviceGuard guard(place_.device); MLUDeviceGuard guard(place_.device);
stream_.reset(new stream::MLUStream(place_, priority)); stream_.reset(new stream::MLUStream(place_, priority));
InitCNNLContext(); InitCNNLContext();
InitMLUOPContext();
} }
MLUContext::~MLUContext() { MLUContext::~MLUContext() {
MLUDeviceGuard guard(place_.device); MLUDeviceGuard guard(place_.device);
DestoryCNNLContext(); DestoryCNNLContext();
DestoryMLUOPContext();
} }
MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) { MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
...@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) { ...@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
driver_version_ = GetMLUDriverVersion(place_.device); driver_version_ = GetMLUDriverVersion(place_.device);
runtime_version_ = GetMLURuntimeVersion(place_.device); runtime_version_ = GetMLURuntimeVersion(place_.device);
cnnl_version_ = GetMLUCnnlVersion(place_.device); cnnl_version_ = GetMLUCnnlVersion(place_.device);
mluOp_version_ = GetMLUOpVersion(place_.device);
LOG_FIRST_N(WARNING, 1) LOG_FIRST_N(WARNING, 1)
<< "Please NOTE: device: " << static_cast<int>(place_.device) << "Please NOTE: device: " << static_cast<int>(place_.device)
...@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) { ...@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
<< ", Runtime API Version: " << runtime_version_ / 10000 << "." << ", Runtime API Version: " << runtime_version_ / 10000 << "."
<< (runtime_version_ / 100) % 100 << "." << runtime_version_ % 100 << (runtime_version_ / 100) % 100 << "." << runtime_version_ % 100
<< ", Cnnl API Version: " << cnnl_version_ / 10000 << "." << ", Cnnl API Version: " << cnnl_version_ / 10000 << "."
<< (cnnl_version_ / 100) % 100 << "." << cnnl_version_ % 100; << (cnnl_version_ / 100) % 100 << "." << cnnl_version_ % 100
<< ", MluOp API Version: " << mluOp_version_ / 10000 << "."
<< (mluOp_version_ / 100) % 100 << "." << mluOp_version_ % 100;
default_ctx_.reset(new MLUContext(place_)); default_ctx_.reset(new MLUContext(place_));
} }
...@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const { ...@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const {
return context()->CnnlHandle(); return context()->CnnlHandle();
} }
mluOpHandle MLUDeviceContext::mluOp_handle() const {
return context()->MluOpHandle();
}
mluStream MLUDeviceContext::stream() const { return context()->RawStream(); } mluStream MLUDeviceContext::stream() const { return context()->RawStream(); }
#endif #endif
......
...@@ -53,12 +53,19 @@ class MLUContext { ...@@ -53,12 +53,19 @@ class MLUContext {
const mluCnnlHandle& CnnlHandle() const { return cnnl_handle_; } const mluCnnlHandle& CnnlHandle() const { return cnnl_handle_; }
const mluOpHandle& MluOpHandle() const { return mluOp_handle_; }
private: private:
void InitCNNLContext() { void InitCNNLContext() {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreate(&cnnl_handle_)); PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreate(&cnnl_handle_));
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetQueue(cnnl_handle_, RawStream())); PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetQueue(cnnl_handle_, RawStream()));
} }
void InitMLUOPContext() {
PADDLE_ENFORCE_MLU_SUCCESS(mluOpCreate(&mluOp_handle_));
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetQueue(mluOp_handle_, RawStream()));
}
void DestoryCNNLContext() { void DestoryCNNLContext() {
if (cnnl_handle_) { if (cnnl_handle_) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlDestroy(cnnl_handle_)); PADDLE_ENFORCE_MLU_SUCCESS(cnnlDestroy(cnnl_handle_));
...@@ -66,10 +73,18 @@ class MLUContext { ...@@ -66,10 +73,18 @@ class MLUContext {
cnnl_handle_ = nullptr; cnnl_handle_ = nullptr;
} }
void DestoryMLUOPContext() {
if (mluOp_handle_) {
PADDLE_ENFORCE_MLU_SUCCESS(mluOpDestroy(mluOp_handle_));
}
mluOp_handle_ = nullptr;
}
MLUPlace place_; MLUPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_; std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
std::unique_ptr<stream::MLUStream> stream_; std::unique_ptr<stream::MLUStream> stream_;
mluCnnlHandle cnnl_handle_; mluCnnlHandle cnnl_handle_;
mluOpHandle mluOp_handle_;
DISABLE_COPY_AND_ASSIGN(MLUContext); DISABLE_COPY_AND_ASSIGN(MLUContext);
}; };
...@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext { ...@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext {
/*! \brief Return cnnl handle in the device context. */ /*! \brief Return cnnl handle in the device context. */
mluCnnlHandle cnnl_handle() const; mluCnnlHandle cnnl_handle() const;
/*! \brief Return mluOp handle in the device context. */
mluOpHandle mluOp_handle() const;
/*! \brief Return mlu stream in the device context. */ /*! \brief Return mlu stream in the device context. */
mluStream stream() const; mluStream stream() const;
...@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext { ...@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext {
int driver_version_; int driver_version_;
int runtime_version_; int runtime_version_;
int cnnl_version_; int cnnl_version_;
int mluOp_version_;
MLUPlace place_; MLUPlace place_;
std::shared_ptr<MLUContext> default_ctx_; std::shared_ptr<MLUContext> default_ctx_;
......
...@@ -41,6 +41,7 @@ struct MLUStatusType {}; ...@@ -41,6 +41,7 @@ struct MLUStatusType {};
DEFINE_MLU_STATUS_TYPE(cnrtStatus, cnrtSuccess, CNRT); DEFINE_MLU_STATUS_TYPE(cnrtStatus, cnrtSuccess, CNRT);
DEFINE_MLU_STATUS_TYPE(cnnlStatus, CNNL_STATUS_SUCCESS, CNNL); DEFINE_MLU_STATUS_TYPE(cnnlStatus, CNNL_STATUS_SUCCESS, CNNL);
DEFINE_MLU_STATUS_TYPE(mluOpStatus, MLUOP_STATUS_SUCCESS, MLUOP);
DEFINE_MLU_STATUS_TYPE(cnStatus, CN_SUCCESS, CN); DEFINE_MLU_STATUS_TYPE(cnStatus, CN_SUCCESS, CN);
#ifdef PADDLE_WITH_CNCL #ifdef PADDLE_WITH_CNCL
DEFINE_MLU_STATUS_TYPE(cnclStatus, CNCL_RET_SUCCESS, CNCL); DEFINE_MLU_STATUS_TYPE(cnclStatus, CNCL_RET_SUCCESS, CNCL);
...@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) { ...@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) {
return sout.str(); return sout.str();
} }
/*************** MLU OP ERROR ***************/
inline bool is_error(mluOpStatus stat) { return stat != MLUOP_STATUS_SUCCESS; }
inline std::string build_mlu_error_msg(mluOpStatus stat) {
std::ostringstream sout;
sout << "MLU OP error(" << stat << "), " << mluOpGetErrorString(stat) << ". ";
return sout.str();
}
/*************** CN API ERROR ***************/ /*************** CN API ERROR ***************/
inline bool is_error(cnStatus stat) { return stat != CN_SUCCESS; } inline bool is_error(cnStatus stat) { return stat != CN_SUCCESS; }
......
...@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) { ...@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) {
return x * 10000 + y * 100 + z; return x * 10000 + y * 100 + z;
} }
int GetMLUOpVersion(int id) {
CheckDeviceId(id);
int x, y, z;
mluOpGetLibVersion(&x, &y, &z);
return x * 10000 + y * 100 + z;
}
int GetMLUCurrentDeviceId() { int GetMLUCurrentDeviceId() {
int device_id; int device_id;
PADDLE_ENFORCE_MLU_SUCCESS(cnrtGetDevice(&device_id)); PADDLE_ENFORCE_MLU_SUCCESS(cnrtGetDevice(&device_id));
......
...@@ -16,10 +16,11 @@ limitations under the License. */ ...@@ -16,10 +16,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_MLU #ifdef PADDLE_WITH_MLU
#include <cn_api.h> #include <cn_api.h>
#include <cndrv_id.h>
#include <cnnl.h> #include <cnnl.h>
#include <cnpapi.h> #include <cnpapi.h>
#include <cnpapi_cndrv_id.h>
#include <cnrt.h> #include <cnrt.h>
#include <mlu_op.h>
#ifdef PADDLE_WITH_CNCL #ifdef PADDLE_WITH_CNCL
#include <cncl.h> #include <cncl.h>
#endif #endif
...@@ -30,11 +31,13 @@ namespace paddle { ...@@ -30,11 +31,13 @@ namespace paddle {
using cnStatus = CNresult; using cnStatus = CNresult;
using cnrtStatus = cnrtRet_t; using cnrtStatus = cnrtRet_t;
using cnnlStatus = cnnlStatus_t; using cnnlStatus = cnnlStatus_t;
using mluOpStatus = mluOpStatus_t;
#ifdef PADDLE_WITH_CNCL #ifdef PADDLE_WITH_CNCL
using cnclStatus = cnclResult_t; using cnclStatus = cnclResult_t;
#endif #endif
using mluStream = cnrtQueue_t; using mluStream = cnrtQueue_t;
using mluCnnlHandle = cnnlHandle_t; using mluCnnlHandle = cnnlHandle_t;
using mluOpHandle = mluOpHandle_t;
using mluEventHandle = cnrtNotifier_t; using mluEventHandle = cnrtNotifier_t;
using mluDeviceHandle = CNdev; using mluDeviceHandle = CNdev;
...@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id); ...@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id);
//! Get the cnnl version of the ith MLU. //! Get the cnnl version of the ith MLU.
int GetMLUCnnlVersion(int id); int GetMLUCnnlVersion(int id);
//! Get the mluOp version of the ith MLU.
int GetMLUOpVersion(int id);
//! Get the total number of MLU devices in system. //! Get the total number of MLU devices in system.
int GetMLUDeviceCount(); int GetMLUDeviceCount();
......
...@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest): ...@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest):
self.mode = "bilinear" self.mode = "bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True. class Case1(TestGridSamplerOp):
# class Case1(TestGridSamplerOp):
# def initTestCase(self):
# def initTestCase(self): self.x_shape = (2, 3, 5, 6)
# self.x_shape = (2, 3, 5, 6) self.grid_shape = (2, 8, 9, 2)
# self.grid_shape = (2, 8, 9, 2) self.theta_shape = (2, 2, 3)
# self.theta_shape = (2, 2, 3) self.align_corners = True
# self.align_corners = True self.padding_mode = "zeros"
# self.padding_mode = "zeros" self.mode = "bilinear"
# self.mode = "bilinear"
class LargeInputCase(TestGridSamplerOp): class LargeInputCase(TestGridSamplerOp):
...@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp): ...@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp):
self.mode = "bilinear" self.mode = "bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True. class Case2(LargeInputCase):
# class Case2(LargeInputCase):
# def initTestCase(self):
# def initTestCase(self): self.x_shape = (2, 3, 128, 128)
# self.x_shape = (2, 3, 128, 128) self.grid_shape = (2, 130, 130, 2)
# self.grid_shape = (2, 130, 130, 2) self.theta_shape = (2, 2, 3)
# self.theta_shape = (2, 2, 3) self.align_corners = True
# self.align_corners = True self.padding_mode = "zeros"
# self.padding_mode = "zeros" self.mode = "bilinear"
# self.mode = "bilinear"
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp): ...@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp):
self.axis = (3, 4, 5) self.axis = (3, 4, 5)
self.keep_dim = True self.keep_dim = True
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'],
'Out',
max_relative_error=0.03)
class TestReduceAll(TestMLUReduceSumOp): class TestReduceAll(TestMLUReduceSumOp):
......
# A image for building paddle binaries # A image for building paddle binaries
# Update CNTOOLKIT_VERSION, CNNL_VERSION and CNCL_VERSION if using other versions # Update CNTOOLKIT_VERSION, CNNL_VERSION, CNCL_VERSION and MLUOPS_VERSION if using other versions
# #
# Build: # Build:
# - CNTOOLKIT_VERSION 2.8.5 # - CNTOOLKIT_VERSION 3.0.2-1
# - CNNL_VERSION 1.10.5 # - CNNL_VERSION 1.13.0-1
# - CNCL_VERSION 1.1.2 # - CNCL_VERSION 1.2.1-1
# - MLUOPS_VERSION 0.2.0-1
# #
# Download three packages from FTP (need to connect cambricon AE to get FTP url) # Download three packages from FTP (need to connect cambricon AE to get FTP url)
# - cntoolkit_2.8.5.ubuntu18.04_amd64.deb # - cntoolkit_3.0.2-1.ubuntu18.04_amd64.deb
# - cnnl_1.10.5.ubuntu18.04_amd64.deb # - cnnl_1.13.0-1.ubuntu18.04_amd64.deb
# - cncl_1.1.2.ubuntu18.04_amd64.deb # - cncl_1.2.1-1.ubuntu18.04_amd64.deb
# - mluops_0.2.0-1.ubuntu18.04_amd64.deb
# copy them to current directory first, then run build commands # copy them to current directory first, then run build commands
# #
# For example: # For example:
...@@ -19,11 +21,13 @@ ...@@ -19,11 +21,13 @@
# (get cntoolkit pkg) # (get cntoolkit pkg)
# (get cnnl pkg) # (get cnnl pkg)
# (get cncl pkg) # (get cncl pkg)
# (get mluops pkg)
# #
# docker build -f Dockerfile.mlu \ # docker build -f Dockerfile.mlu \
# --build-arg CNTOOLKIT_VERSION=2.8.5 \ # --build-arg CNTOOLKIT_VERSION=3.0.2-1 \
# --build-arg CNNL_VERSION=1.10.5 \ # --build-arg CNNL_VERSION=1.13.0-1 \
# --build-arg CNCL_VERSION=1.1.2 \ # --build-arg CNCL_VERSION=1.2.1-1 \
# --build-arg MLUOPS_VERSION=0.2.0-1 \
# -t paddlepaddle/paddle:latest-dev-mlu . # -t paddlepaddle/paddle:latest-dev-mlu .
# #
# without mlu device: # without mlu device:
...@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com> ...@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV WITH_GPU=OFF ENV WITH_GPU=OFF
ARG CNTOOLKIT_VERSION=2.8.5 ARG CNTOOLKIT_VERSION=3.0.2-1
ARG CNNL_VERSION=1.10.5 ARG CNNL_VERSION=1.13.0-1
ARG CNCL_VERSION=1.1.2 ARG CNCL_VERSION=1.2.1-1
ARG MLUOPS_VERSION=0.2.0-1
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG MLUOPS_PKG=mluops_$MLUOPS_VERSION.ubuntu18.04_amd64.deb
# install cntoolkit # install cntoolkit
COPY $CNTOOLKIT_PKG ./ COPY $CNTOOLKIT_PKG ./
...@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./ ...@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./
RUN dpkg -i $CNCL_PKG && \ RUN dpkg -i $CNCL_PKG && \
rm -f $CNCL_PKG rm -f $CNCL_PKG
# install mluops
COPY $MLUOPS_PKG ./
RUN dpkg -i $MLUOPS_PKG && \
rm -f $MLUOPS_PKG
# Clean # Clean
RUN apt-get clean -y RUN apt-get clean -y
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册