未验证 提交 3e1e482b 编写于 作者: C cifar10 提交者: GitHub

[MLU] fluid: add mluop (#46429)

上级 b0ec8efb
......@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
include_directories(${NEUWARE_INCLUDE_DIR})
set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so)
set(MLUOP_LIB ${NEUWARE_LIB_DIR}/libmluops.so)
set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so)
set(CNDRV_LIB ${NEUWARE_LIB_DIR}/libcndrv.so)
set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so)
generate_dummy_static_lib(LIB_NAME "neuware_lib" GENERATOR "neuware.cmake")
set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${CNRT_LIB} ${CNDRV_LIB} ${CNPAPI_LIB})
set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${MLUOP_LIB} ${CNRT_LIB} ${CNDRV_LIB}
${CNPAPI_LIB})
if(WITH_CNCL)
message(STATUS "Compile with CNCL!")
......
......@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
}
}
class MLUOpTensorDescPool {
public:
mluOpTensorDescriptor_t Pop() {
mluOpTensorDescriptor_t raw_desc;
if (q_.try_dequeue(raw_desc)) {
return raw_desc;
} else {
mluOpCreateTensorDescriptor(&raw_desc);
return raw_desc;
}
}
void Recycle(mluOpTensorDescriptor_t desc) {
mluOpResetTensorDescriptor(desc);
q_.enqueue(desc);
}
~MLUOpTensorDescPool() {
auto size = q_.size_approx();
if (size > 0) {
std::vector<mluOpTensorDescriptor_t> vec(size);
q_.try_dequeue_bulk(vec.data(), size);
for (auto desc : vec) {
mluOpDestroyTensorDescriptor(desc);
}
}
}
private:
moodycamel::ConcurrentQueue<mluOpTensorDescriptor_t> q_;
};
static MLUOpTensorDescPool g_mluop_tensor_desc_pool;
MLUOpTensorDesc& MLUOpTensorDesc::operator=(MLUOpTensorDesc&& rhs) {
if (raw_tensor_desc) {
g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc);
}
raw_tensor_desc = rhs.raw_tensor_desc;
rhs.raw_tensor_desc = nullptr;
return *this;
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype) {
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
MLUOP_LAYOUT_ARRAY,
tensor_dtype,
tensor_dim,
dim_sizes));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout) {
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
raw_tensor_desc, layout, tensor_dtype, tensor_dim, dim_sizes));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position)
: MLUOpTensorDesc(tensor_dim, dim_sizes, tensor_dtype) {
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype) {
std::vector<int> dim_sizes_int32(tensor_dim);
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
std::transform(int64_cbegin,
int64_cend,
dim_sizes_int32.begin(),
&CheckedNarrowing<int64_t, int>);
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
MLUOP_LAYOUT_ARRAY,
tensor_dtype,
tensor_dim,
dim_sizes_int32.data()));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout) {
std::vector<int> dim_sizes_int32(tensor_dim);
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
std::transform(int64_cbegin,
int64_cend,
dim_sizes_int32.begin(),
&CheckedNarrowing<int64_t, int>);
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
layout,
tensor_dtype,
tensor_dim,
dim_sizes_int32.data()));
}
MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position) {
std::vector<int> dim_sizes_int32(tensor_dim);
std::vector<int64_t>::const_iterator int64_cbegin(dim_sizes);
std::vector<int64_t>::const_iterator int64_cend(dim_sizes + tensor_dim);
std::transform(int64_cbegin,
int64_cend,
dim_sizes_int32.begin(),
&CheckedNarrowing<int64_t, int>);
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(raw_tensor_desc,
MLUOP_LAYOUT_ARRAY,
tensor_dtype,
tensor_dim,
dim_sizes_int32.data()));
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
const mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype) {
auto dims = phi::vectorize<int>(tensor.dims());
int tensor_dim = dims.size();
raw_tensor_desc = g_mluop_tensor_desc_pool.Pop();
if (tensor_dim == 0) {
int scalar_dims[1] = {1};
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptor(
raw_tensor_desc, layout, tensor_dtype, 1, scalar_dims));
} else {
std::vector<int> tensor_dim_sizes_int(dims.begin(), dims.end());
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptor(raw_tensor_desc,
layout,
tensor_dtype,
tensor_dim,
tensor_dim_sizes_int.data()));
}
}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor)
: MLUOpTensorDesc(
tensor, MLUOP_LAYOUT_ARRAY, ToMluOpDataType(tensor.dtype())) {}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position)
: MLUOpTensorDesc(tensor, layout, tensor_dtype) {
PADDLE_ENFORCE_MLU_SUCCESS(
mluOpSetTensorDescriptorPosition(raw_tensor_desc, position));
}
MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position,
float scale)
: MLUOpTensorDesc(tensor, layout, tensor_dtype) {
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetTensorDescriptorPositionAndScale(
raw_tensor_desc, position, scale));
}
MLUOpTensorDesc::~MLUOpTensorDesc() {
if (raw_tensor_desc) {
g_mluop_tensor_desc_pool.Recycle(raw_tensor_desc);
}
}
MLUCnnlActivationDesc::MLUCnnlActivationDesc(
const cnnlActivationMode_t act_mode, const float ceof) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateActivationDescriptor(&active_desc_));
......@@ -1563,13 +1743,31 @@ MLURNNDesc::~MLURNNDesc() {
void* indices_out) {
cnnlHandle_t handle = GetHandleFromCTX(ctx);
PADDLE_ENFORCE_MLU_SUCCESS(cnnlTopKTensor(handle,
size_t workspace_size;
PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetTopKTensorWorkspaceSize(handle,
input_desc,
k,
dim,
largest,
values_output_desc,
indices_output_desc,
&workspace_size));
auto& dev_ctx = GetDevCtxFromCTX(ctx);
Tensor workspace = ctx.AllocateTmpTensor<int8_t, MLUDeviceContext>(
{static_cast<int64_t>(workspace_size)}, dev_ctx);
void* workspace_ptr = workspace.mutable_data(ctx.GetPlace());
PADDLE_ENFORCE_MLU_SUCCESS(cnnlTopKTensor_v3(handle,
input_desc,
input,
k,
dim,
largest,
sorted,
false /*lower_index_first*/,
workspace_ptr,
workspace_size,
values_output_desc,
values_out,
indices_output_desc,
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <cn_api.h>
#include <cnnl.h>
#include <concurrentqueue.h>
#include <mlu_op.h>
#include <string>
#include <vector>
......@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() {
return ToCnnlDataType(type);
}
inline mluOpDataType_t ToMluOpDataType(
const paddle::experimental::DataType& dtype) {
mluOpDataType_t type = MLUOP_DTYPE_FLOAT;
switch (dtype) {
case DataType::FLOAT16:
type = MLUOP_DTYPE_HALF;
break;
case DataType::FLOAT32:
type = MLUOP_DTYPE_FLOAT;
break;
case DataType::FLOAT64:
type = MLUOP_DTYPE_DOUBLE;
break;
case DataType::INT8:
type = MLUOP_DTYPE_INT8;
break;
case DataType::INT16:
type = MLUOP_DTYPE_INT16;
break;
case DataType::INT32:
type = MLUOP_DTYPE_INT32;
break;
case DataType::INT64:
type = MLUOP_DTYPE_INT64;
break;
case DataType::BOOL:
type = MLUOP_DTYPE_BOOL;
break;
case DataType::UINT8:
type = MLUOP_DTYPE_UINT8;
break;
default:
break;
}
return type;
}
inline mluOpDataType_t ToMluOpDataType(
const paddle::framework::proto::VarType::Type& type) {
return ToMluOpDataType(framework::TransToPhiDataType(type));
}
template <typename T>
inline mluOpDataType_t ToMluOpDataType() {
auto type = framework::ToDataType(std::type_index(typeid(T)));
return ToMluOpDataType(type);
}
// Converts (via narrowing) a type T value to a type U, and checks that the
// value has no value change due to the conversion.
template <typename WideT, typename NarrowT>
......@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) {
return ctx.template device_context<MLUDeviceContext>().cnnl_handle();
}
inline static mluOpHandle_t GetMLUOpHandleFromCTX(const ExecutionContext& ctx) {
return ctx.template device_context<MLUDeviceContext>().mluOp_handle();
}
inline static const MLUDeviceContext& GetDevCtxFromCTX(
const ExecutionContext& ctx) {
return ctx.template device_context<MLUDeviceContext>();
......@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc {
cnnlTensorDescriptor_t raw_tensor_desc = nullptr;
};
class MLUOpTensorDesc {
public:
MLUOpTensorDesc() {}
// SE_DISALLOW_COPY_AND_ASSIGN
MLUOpTensorDesc(const MLUOpTensorDesc& desc) = delete;
MLUOpTensorDesc& operator=(const MLUOpTensorDesc&) = delete;
MLUOpTensorDesc(MLUOpTensorDesc&& rhs)
: raw_tensor_desc(rhs.raw_tensor_desc) {
rhs.raw_tensor_desc = nullptr;
}
MLUOpTensorDesc& operator=(MLUOpTensorDesc&& rhs);
MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype);
MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout);
MLUOpTensorDesc(const int tensor_dim,
const int dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position);
MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype);
MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
const mluOpTensorLayout_t layout);
MLUOpTensorDesc(const int tensor_dim,
const int64_t dim_sizes[],
const mluOpDataType_t tensor_dtype,
int position);
MLUOpTensorDesc(const Tensor& tensor,
const mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype);
explicit MLUOpTensorDesc(const Tensor& tensor);
MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position);
MLUOpTensorDesc(const Tensor& tensor,
mluOpTensorLayout_t layout,
const mluOpDataType_t tensor_dtype,
int position,
float scale);
~MLUOpTensorDesc();
const mluOpTensorDescriptor_t get() const { return raw_tensor_desc; }
private:
mluOpTensorDescriptor_t raw_tensor_desc = nullptr;
};
class MLUCnnlActivationDesc {
public:
MLUCnnlActivationDesc(const MLUCnnlActivationDesc& desc) = delete;
......
......@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) {
MLUDeviceGuard guard(place_.device);
stream_.reset(new stream::MLUStream(place_, priority));
InitCNNLContext();
InitMLUOPContext();
}
MLUContext::~MLUContext() {
MLUDeviceGuard guard(place_.device);
DestoryCNNLContext();
DestoryMLUOPContext();
}
MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
......@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
driver_version_ = GetMLUDriverVersion(place_.device);
runtime_version_ = GetMLURuntimeVersion(place_.device);
cnnl_version_ = GetMLUCnnlVersion(place_.device);
mluOp_version_ = GetMLUOpVersion(place_.device);
LOG_FIRST_N(WARNING, 1)
<< "Please NOTE: device: " << static_cast<int>(place_.device)
......@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
<< ", Runtime API Version: " << runtime_version_ / 10000 << "."
<< (runtime_version_ / 100) % 100 << "." << runtime_version_ % 100
<< ", Cnnl API Version: " << cnnl_version_ / 10000 << "."
<< (cnnl_version_ / 100) % 100 << "." << cnnl_version_ % 100;
<< (cnnl_version_ / 100) % 100 << "." << cnnl_version_ % 100
<< ", MluOp API Version: " << mluOp_version_ / 10000 << "."
<< (mluOp_version_ / 100) % 100 << "." << mluOp_version_ % 100;
default_ctx_.reset(new MLUContext(place_));
}
......@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const {
return context()->CnnlHandle();
}
mluOpHandle MLUDeviceContext::mluOp_handle() const {
return context()->MluOpHandle();
}
mluStream MLUDeviceContext::stream() const { return context()->RawStream(); }
#endif
......
......@@ -53,12 +53,19 @@ class MLUContext {
const mluCnnlHandle& CnnlHandle() const { return cnnl_handle_; }
const mluOpHandle& MluOpHandle() const { return mluOp_handle_; }
private:
void InitCNNLContext() {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreate(&cnnl_handle_));
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetQueue(cnnl_handle_, RawStream()));
}
void InitMLUOPContext() {
PADDLE_ENFORCE_MLU_SUCCESS(mluOpCreate(&mluOp_handle_));
PADDLE_ENFORCE_MLU_SUCCESS(mluOpSetQueue(mluOp_handle_, RawStream()));
}
void DestoryCNNLContext() {
if (cnnl_handle_) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlDestroy(cnnl_handle_));
......@@ -66,10 +73,18 @@ class MLUContext {
cnnl_handle_ = nullptr;
}
void DestoryMLUOPContext() {
if (mluOp_handle_) {
PADDLE_ENFORCE_MLU_SUCCESS(mluOpDestroy(mluOp_handle_));
}
mluOp_handle_ = nullptr;
}
MLUPlace place_;
std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
std::unique_ptr<stream::MLUStream> stream_;
mluCnnlHandle cnnl_handle_;
mluOpHandle mluOp_handle_;
DISABLE_COPY_AND_ASSIGN(MLUContext);
};
......@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext {
/*! \brief Return cnnl handle in the device context. */
mluCnnlHandle cnnl_handle() const;
/*! \brief Return mluOp handle in the device context. */
mluOpHandle mluOp_handle() const;
/*! \brief Return mlu stream in the device context. */
mluStream stream() const;
......@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext {
int driver_version_;
int runtime_version_;
int cnnl_version_;
int mluOp_version_;
MLUPlace place_;
std::shared_ptr<MLUContext> default_ctx_;
......
......@@ -41,6 +41,7 @@ struct MLUStatusType {};
DEFINE_MLU_STATUS_TYPE(cnrtStatus, cnrtSuccess, CNRT);
DEFINE_MLU_STATUS_TYPE(cnnlStatus, CNNL_STATUS_SUCCESS, CNNL);
DEFINE_MLU_STATUS_TYPE(mluOpStatus, MLUOP_STATUS_SUCCESS, MLUOP);
DEFINE_MLU_STATUS_TYPE(cnStatus, CN_SUCCESS, CN);
#ifdef PADDLE_WITH_CNCL
DEFINE_MLU_STATUS_TYPE(cnclStatus, CNCL_RET_SUCCESS, CNCL);
......@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) {
return sout.str();
}
/*************** MLU OP ERROR ***************/
inline bool is_error(mluOpStatus stat) { return stat != MLUOP_STATUS_SUCCESS; }
inline std::string build_mlu_error_msg(mluOpStatus stat) {
std::ostringstream sout;
sout << "MLU OP error(" << stat << "), " << mluOpGetErrorString(stat) << ". ";
return sout.str();
}
/*************** CN API ERROR ***************/
inline bool is_error(cnStatus stat) { return stat != CN_SUCCESS; }
......
......@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) {
return x * 10000 + y * 100 + z;
}
int GetMLUOpVersion(int id) {
CheckDeviceId(id);
int x, y, z;
mluOpGetLibVersion(&x, &y, &z);
return x * 10000 + y * 100 + z;
}
int GetMLUCurrentDeviceId() {
int device_id;
PADDLE_ENFORCE_MLU_SUCCESS(cnrtGetDevice(&device_id));
......
......@@ -16,10 +16,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_MLU
#include <cn_api.h>
#include <cndrv_id.h>
#include <cnnl.h>
#include <cnpapi.h>
#include <cnpapi_cndrv_id.h>
#include <cnrt.h>
#include <mlu_op.h>
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#endif
......@@ -30,11 +31,13 @@ namespace paddle {
using cnStatus = CNresult;
using cnrtStatus = cnrtRet_t;
using cnnlStatus = cnnlStatus_t;
using mluOpStatus = mluOpStatus_t;
#ifdef PADDLE_WITH_CNCL
using cnclStatus = cnclResult_t;
#endif
using mluStream = cnrtQueue_t;
using mluCnnlHandle = cnnlHandle_t;
using mluOpHandle = mluOpHandle_t;
using mluEventHandle = cnrtNotifier_t;
using mluDeviceHandle = CNdev;
......@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id);
//! Get the cnnl version of the ith MLU.
int GetMLUCnnlVersion(int id);
//! Get the mluOp version of the ith MLU.
int GetMLUOpVersion(int id);
//! Get the total number of MLU devices in system.
int GetMLUDeviceCount();
......
......@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest):
self.mode = "bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
# class Case1(TestGridSamplerOp):
#
# def initTestCase(self):
# self.x_shape = (2, 3, 5, 6)
# self.grid_shape = (2, 8, 9, 2)
# self.theta_shape = (2, 2, 3)
# self.align_corners = True
# self.padding_mode = "zeros"
# self.mode = "bilinear"
class Case1(TestGridSamplerOp):
def initTestCase(self):
self.x_shape = (2, 3, 5, 6)
self.grid_shape = (2, 8, 9, 2)
self.theta_shape = (2, 2, 3)
self.align_corners = True
self.padding_mode = "zeros"
self.mode = "bilinear"
class LargeInputCase(TestGridSamplerOp):
......@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp):
self.mode = "bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
# class Case2(LargeInputCase):
#
# def initTestCase(self):
# self.x_shape = (2, 3, 128, 128)
# self.grid_shape = (2, 130, 130, 2)
# self.theta_shape = (2, 2, 3)
# self.align_corners = True
# self.padding_mode = "zeros"
# self.mode = "bilinear"
class Case2(LargeInputCase):
def initTestCase(self):
self.x_shape = (2, 3, 128, 128)
self.grid_shape = (2, 130, 130, 2)
self.theta_shape = (2, 2, 3)
self.align_corners = True
self.padding_mode = "zeros"
self.mode = "bilinear"
if __name__ == "__main__":
unittest.main()
......@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp):
self.axis = (3, 4, 5)
self.keep_dim = True
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'],
'Out',
max_relative_error=0.03)
class TestReduceAll(TestMLUReduceSumOp):
......
# A image for building paddle binaries
# Update CNTOOLKIT_VERSION, CNNL_VERSION and CNCL_VERSION if using other versions
# Update CNTOOLKIT_VERSION, CNNL_VERSION, CNCL_VERSION and MLUOPS_VERSION if using other versions
#
# Build:
# - CNTOOLKIT_VERSION 2.8.5
# - CNNL_VERSION 1.10.5
# - CNCL_VERSION 1.1.2
# - CNTOOLKIT_VERSION 3.0.2-1
# - CNNL_VERSION 1.13.0-1
# - CNCL_VERSION 1.2.1-1
# - MLUOPS_VERSION 0.2.0-1
#
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# - cntoolkit_2.8.5.ubuntu18.04_amd64.deb
# - cnnl_1.10.5.ubuntu18.04_amd64.deb
# - cncl_1.1.2.ubuntu18.04_amd64.deb
# - cntoolkit_3.0.2-1.ubuntu18.04_amd64.deb
# - cnnl_1.13.0-1.ubuntu18.04_amd64.deb
# - cncl_1.2.1-1.ubuntu18.04_amd64.deb
# - mluops_0.2.0-1.ubuntu18.04_amd64.deb
# copy them to current directory first, then run build commands
#
# For example:
......@@ -19,11 +21,13 @@
# (get cntoolkit pkg)
# (get cnnl pkg)
# (get cncl pkg)
# (get mluops pkg)
#
# docker build -f Dockerfile.mlu \
# --build-arg CNTOOLKIT_VERSION=2.8.5 \
# --build-arg CNNL_VERSION=1.10.5 \
# --build-arg CNCL_VERSION=1.1.2 \
# --build-arg CNTOOLKIT_VERSION=3.0.2-1 \
# --build-arg CNNL_VERSION=1.13.0-1 \
# --build-arg CNCL_VERSION=1.2.1-1 \
# --build-arg MLUOPS_VERSION=0.2.0-1 \
# -t paddlepaddle/paddle:latest-dev-mlu .
#
# without mlu device:
......@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV WITH_GPU=OFF
ARG CNTOOLKIT_VERSION=2.8.5
ARG CNNL_VERSION=1.10.5
ARG CNCL_VERSION=1.1.2
ARG CNTOOLKIT_VERSION=3.0.2-1
ARG CNNL_VERSION=1.13.0-1
ARG CNCL_VERSION=1.2.1-1
ARG MLUOPS_VERSION=0.2.0-1
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG MLUOPS_PKG=mluops_$MLUOPS_VERSION.ubuntu18.04_amd64.deb
# install cntoolkit
COPY $CNTOOLKIT_PKG ./
......@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./
RUN dpkg -i $CNCL_PKG && \
rm -f $CNCL_PKG
# install mluops
COPY $MLUOPS_PKG ./
RUN dpkg -i $MLUOPS_PKG && \
rm -f $MLUOPS_PKG
# Clean
RUN apt-get clean -y
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册