未验证 提交 c1837d76 编写于 作者: H hong19860320 提交者: GitHub

[LITE][NPU] Upgrade HiAI DDK from 300 to 310 (#2423)

上级 94731268
......@@ -31,117 +31,6 @@
// Extended Ops of HIAI DDK
namespace ge {
/**
* Multiply the matrix x1 by the matrix x2 to generate x1 * x2.
* The inputs must be two-dimensional matrices and the inner dimension of "x1"
* (after being transposed if transpose_x1 is true) must match the outer
* dimension of "x2" (after being transposed if transposed_x2 is true). <Input>
* x : the first input tensor, must be non const op.
* w : the second input tensor, must be const op.
* bias: the optional bias tensor, must be const op.
* <Output>
* y : the output tensor.
* <Attr>
* has_bias: If true, enable input bias.
*/
REG_OP(MatMul)
.INPUT(x, TensorType({DT_FLOAT}))
.INPUT(w, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(bias, TensorType({DT_FLOAT})) // bias must be const input
.OUTPUT(y, TensorType({DT_FLOAT}))
.ATTR(has_bias, AttrValue::BOOL{false}) // when has input::bias,set true
.OP_END();
/**
* Computes the gradients of convolution with respect to the input.
* <Input>
* input_sizes : An integer vector representing the shape of input,
* where input is a 4-D [batch, height, width, channels] tensor.
* filter : the filter tensor, with shape [H , W, filter_channel,
* filter_number], filter_channel must be same as x channel.
* x : The input tensor.
* <Output>
* y : The output tensor.
* <Attr>
* format: 0: NCHW. 1: NHWC
* group : 1: default
* num_output : 0: default, num_output must be equal to
* (filter_channel * group)
* pad : Padding for the beginning and ending along each axis
* stride : Stride along each axis.
* dilation : dilation value along each axis of the filter.
* pad_mode : 0:NOTSET, 5:VALID 6:SAME. defaul value is 0:NOTSET
* bias_term : 0: default
* kernel : The shape of the convolution kernel
*/
REG_OP(Deconvolution)
.INPUT(input_sizes, TensorType({DT_UINT8}))
.INPUT(filter, TensorType({DT_FLOAT}))
.INPUT(x, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(b, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT}))
.ATTR(mode, AttrValue::INT{1})
.ATTR(format, AttrValue::INT{1})
.ATTR(group, AttrValue::INT{1})
.ATTR(num_output, AttrValue::INT{0})
.ATTR(pad, AttrValue::LIST_INT({0, 0, 0, 0}))
.ATTR(stride, AttrValue::LIST_INT({1, 1}))
.ATTR(dilation, AttrValue::LIST_INT({1, 1}))
.ATTR(pad_mode, AttrValue::INT{0})
.ATTR(bias_term, AttrValue::INT{0})
.ATTR(kernel, AttrValue::LIST_INT({0, 0}))
.OP_END();
/**
* Resize images to size using bilinear interpolation.
* <Input>
* x : The tensor of 4-D
* w : A int32 Tensor of 2 elements: [height, width].
* <Output>
* y : the output tensor
* <Attr>
* align_corners : If true, the centers of the 4 corner pixels of the
* input and output tensors are aligned, preserving the values at the corner
* pixels.
* output_dim_mode : Defaults 2, including 0: zoom_factor , 1:
* shrink_factor, 2: height/width. when output_dim_mode=2, the output-dim is
* controled by the [height, width] of w.
* shrink_factor : shrink factor.
* zoom_factor : zoom factor.
* pad_begin : begin of pad.
* pad_end : end of pad.
*/
REG_OP(ResizeBilinear)
.INPUT(x, TensorType({DT_FLOAT, DT_INT32}))
.INPUT(w, TensorType({DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_INT32}))
.ATTR(align_corners, AttrValue::BOOL{false})
.ATTR(output_dim_mode, AttrValue::INT{2})
.ATTR(shrink_factor, AttrValue::INT{1})
.ATTR(zoom_factor, AttrValue::INT{1})
.ATTR(pad_begin, AttrValue::INT{0})
.ATTR(pad_end, AttrValue::INT{0})
.OP_END();
/**
* Resize images to size using nearest neighbor interpolation.
* <Input>
* image : Resize images to size using nearest neighbor interpolation.
* size : Must be one dimension and two elements
* <Output>
* output : the output tensor
* <Attr>
* align_corners : If true, the centers of the 4 corner pixels of the
* input and output tensors are aligned, preserving the values at the corner
* pixels. Defaults to false
*/
REG_OP(ResizeNearestNeighbor)
.INPUT(image, TensorType({DT_FLOAT, DT_INT32, DT_UINT8, DT_BOOL}))
.INPUT(size, TensorType({DT_INT32}))
.OUTPUT(output, TensorType({DT_FLOAT, DT_INT32, DT_UINT8, DT_BOOL}))
.ATTR(align_corners, AttrValue::BOOL{false})
.OP_END();
/**
* Pads a tensor.
* <Input>
......
......@@ -82,7 +82,6 @@ node_map_type ConvTransposeConverter(
lite::npu::OpList::Global().add(inputs_map.at(input_var_name));
// set attributes
conv_transpose_node->set_attr_mode(1);
conv_transpose_node->set_attr_format(0); // NCHW
conv_transpose_node->set_attr_pad_mode(0); // NOTSET
conv_transpose_node->set_attr_group(groups);
......
......@@ -45,6 +45,7 @@ node_map_type InterpolateConverter(
auto out_h = op_info->GetAttr<int>("out_h");
auto align_corners = op_info->GetAttr<bool>("align_corners");
int align_mode = op_info->GetAttr<int>("align_mode");
auto interp_method = op_info->GetAttr<std::string>("interp_method");
CHECK(!(align_mode == 0 && !align_corners)) << "[NPU] align_mode = 0 && "
"align_corners = false isn't "
"supported in HiAI DDK";
......@@ -58,11 +59,11 @@ node_map_type InterpolateConverter(
}
// update out_h and out_w if has OutSize
bool inputs_map_has_w = false;
std::shared_ptr<ge::Operator> out_size_node = nullptr;
if (lite::npu::HasInputArg(op_info, scope, "OutSize")) {
auto out_size_var_name = op_info->Input("OutSize").front();
if (inputs_map.count(out_size_var_name)) {
inputs_map_has_w = true;
out_size_node = inputs_map.at(out_size_var_name);
} else {
auto out_size =
scope->FindVar(out_size_var_name)->GetMutable<lite::Tensor>();
......@@ -73,58 +74,45 @@ node_map_type InterpolateConverter(
out_w = out_size_data[1];
}
}
node_map_type outputs_map;
auto interp_method = op_info->GetAttr<std::string>("interp_method");
if (interp_method == "bilinear") {
auto interp_node = std::make_shared<ge::op::ResizeBilinear>(unique_op_type);
lite::npu::OpList::Global().add(interp_node);
interp_node->set_input_x(*inputs_map.at(x_var_name));
if (inputs_map_has_w) {
auto out_size_var_name = op_info->Input("OutSize").front();
interp_node->set_input_w(*inputs_map.at(out_size_var_name));
lite::npu::OpList::Global().add(inputs_map.at(out_size_var_name));
} else {
if (out_size_node == nullptr) {
if (interp_method == "bilinear") {
const float largest_multiple = 7.0f;
float multiple = static_cast<float>(x_h * x_w) / (out_h * out_w);
CHECK_LT(multiple, largest_multiple)
<< "[NPU] multiple=(ih*iw)/(oh*ow)=" << multiple
<< " is too large, should not exceed " << largest_multiple
<< " in HiAI DDK";
auto w_const_node =
std::make_shared<ge::op::Const>(unique_op_type + "/w");
w_const_node->set_attr_value(
lite::npu::CreateTensorAndFillData(std::vector<int>({out_h, out_w})));
interp_node->set_input_w(*w_const_node);
lite::npu::OpList::Global().add(w_const_node);
}
interp_node->set_attr_output_dim_mode(
2); // 0: zoom_factor, 1: shrink_factor, 2: height/width
interp_node->set_attr_align_corners(align_corners);
outputs_map[op_info->Output("Out").front()] = interp_node;
auto out_size_const_node =
std::make_shared<ge::op::Const>(unique_op_type + "/out_size");
out_size_const_node->set_attr_value(
lite::npu::CreateTensorAndFillData(std::vector<int>({out_h, out_w})));
out_size_node = out_size_const_node;
}
lite::npu::OpList::Global().add(out_size_node);
std::shared_ptr<ge::Operator> interp_node = nullptr;
if (interp_method == "bilinear") {
auto bilinear_interp_node =
std::make_shared<ge::op::ResizeBilinear>(unique_op_type);
bilinear_interp_node->set_input_x(*inputs_map.at(x_var_name));
bilinear_interp_node->set_input_size(*out_size_node);
bilinear_interp_node->set_attr_align_corners(align_corners);
interp_node = bilinear_interp_node;
} else if (interp_method == "nearest") {
auto interp_node =
auto nearest_interp_node =
std::make_shared<ge::op::ResizeNearestNeighbor>(unique_op_type);
lite::npu::OpList::Global().add(interp_node);
interp_node->set_input_image(*inputs_map.at(x_var_name));
if (inputs_map_has_w) {
auto out_size_var_name = op_info->Input("OutSize").front();
interp_node->set_input_size(*inputs_map.at(out_size_var_name));
lite::npu::OpList::Global().add(inputs_map.at(out_size_var_name));
} else {
auto w_const_node =
std::make_shared<ge::op::Const>(unique_op_type + "/w");
w_const_node->set_attr_value(
lite::npu::CreateTensorAndFillData(std::vector<int>({out_h, out_w})));
interp_node->set_input_size(*w_const_node);
lite::npu::OpList::Global().add(w_const_node);
}
interp_node->set_attr_align_corners(align_corners);
outputs_map[op_info->Output("Out").front()] = interp_node;
nearest_interp_node->set_input_image(*inputs_map.at(x_var_name));
nearest_interp_node->set_input_size(*out_size_node);
nearest_interp_node->set_attr_align_corners(align_corners);
interp_node = nearest_interp_node;
} else {
LOG(FATAL) << "[NPU] Unsupported interpolate method: " << interp_method;
}
lite::npu::OpList::Global().add(interp_node);
node_map_type outputs_map;
outputs_map[op_info->Output("Out").front()] = interp_node;
return outputs_map;
}
......
......@@ -31,82 +31,67 @@ node_map_type MulConverter(const std::shared_ptr<lite::OpLite> mul_op,
auto unique_op_type = lite::npu::UniqueName(op_type);
LOG(INFO) << "[NPU] Converting " + op_type + "...";
auto output_node = std::make_shared<ge::op::MatMul>(unique_op_type);
auto x_var_name = op_info->Input("X").front();
auto y_var_name = op_info->Input("Y").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
auto y_dims = y->dims();
int x_num_col_dims = op_info->GetAttr<int>("x_num_col_dims");
int y_num_col_dims = op_info->GetAttr<int>("y_num_col_dims");
auto* xtensor = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto* ytensor = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
int m = xtensor->dims().Slice(0, x_num_col_dims).production();
int x_w = xtensor->dims()
.Slice(x_num_col_dims, xtensor->dims().size())
.production();
int y_h = ytensor->dims().Slice(0, y_num_col_dims).production();
int n = ytensor->dims()
.Slice(y_num_col_dims, ytensor->dims().size())
.production();
CHECK_EQ(x_w, y_h) << "[NPU] x_w must be equal with y_h";
int k = x_w;
int m = x_dims.Slice(0, x_num_col_dims).production();
int k = x_dims.Slice(x_num_col_dims, x_dims.size()).production();
CHECK_EQ(k, y_dims.Slice(0, y_num_col_dims).production())
<< "[NPU] columns of X must be equal with rows of Y";
int n = y_dims.Slice(y_num_col_dims, y_dims.size()).production();
LOG(INFO) << "m:" << m << ",n:" << n << ",k:" << k;
LOG(INFO) << "x_var_name:" << x_var_name
<< ", is data: " << inputs_map.count(x_var_name);
LOG(INFO) << "y_var_name:" << y_var_name
<< ", is data: " << inputs_map.count(y_var_name);
CHECK(inputs_map.count(x_var_name))
<< "[NPU] MatMul only support X is data, Y is const yet";
<< "[NPU] MatMul in HiAI DDK only support X is data, Y is const yet.";
auto mul_node = std::make_shared<ge::op::MatMul>(unique_op_type);
// add input x node which supports persistable and non-persistable tensor, and
// reshape to (m, k)
if (inputs_map.count(x_var_name)) {
auto xsrc = inputs_map.at(x_var_name);
auto reshapex = std::make_shared<ge::op::Reshape>(x_var_name + "_reshape");
reshapex->set_input_tensor(*xsrc);
reshapex->set_attr_shape({m, k});
reshapex->set_attr_axis(0);
lite::npu::OpList::Global().add(xsrc);
lite::npu::OpList::Global().add(reshapex);
output_node->set_input_x(*reshapex);
auto reshaped_x_node =
std::make_shared<ge::op::Reshape>(x_var_name + "_reshape");
reshaped_x_node->set_input_tensor(*inputs_map.at(x_var_name));
reshaped_x_node->set_attr_shape({m, k});
reshaped_x_node->set_attr_axis(0);
mul_node->set_input_x1(*reshaped_x_node);
lite::npu::OpList::Global().add(inputs_map.at(x_var_name));
lite::npu::OpList::Global().add(reshaped_x_node);
} else {
auto constx = std::make_shared<ge::op::Const>(x_var_name);
ge::TensorDesc desc(ge::Shape({m, k}), ge::FORMAT_NCHW, ge::DT_FLOAT);
auto size = desc.GetShape().GetShapeSize();
CHECK_EQ(size, xtensor->dims().production());
ge::TensorPtr ptensor = std::make_shared<ge::Tensor>();
ptensor->SetTensorDesc(desc);
auto* pdata = reinterpret_cast<uint8_t*>(xtensor->mutable_data<float>());
ptensor->SetData(pdata, size * sizeof(float));
constx->set_attr_value(ptensor);
lite::npu::OpList::Global().add(constx);
output_node->set_input_x(*constx);
auto x_const_node = std::make_shared<ge::op::Const>(x_var_name);
x_const_node->set_attr_value(lite::npu::CvtTensor(x, {m, k}));
mul_node->set_input_x1(*x_const_node);
lite::npu::OpList::Global().add(x_const_node);
}
// add input y node which only supports persistable tensor, and reshape to (k,
// n)
if (inputs_map.count(y_var_name)) {
auto ysrc = inputs_map.at(y_var_name);
auto reshapey = std::make_shared<ge::op::Reshape>(y_var_name + "_reshape");
reshapey->set_input_tensor(*ysrc);
reshapey->set_attr_shape({k, n});
reshapey->set_attr_axis(0);
lite::npu::OpList::Global().add(ysrc);
lite::npu::OpList::Global().add(reshapey);
output_node->set_input_w(*reshapey);
auto reshaped_y_node =
std::make_shared<ge::op::Reshape>(y_var_name + "_reshape");
reshaped_y_node->set_input_tensor(*inputs_map.at(y_var_name));
reshaped_y_node->set_attr_shape({k, n});
reshaped_y_node->set_attr_axis(0);
mul_node->set_input_x2(*reshaped_y_node);
lite::npu::OpList::Global().add(inputs_map.at(y_var_name));
lite::npu::OpList::Global().add(reshaped_y_node);
} else {
auto consty = std::make_shared<ge::op::Const>(y_var_name);
ge::TensorDesc desc(ge::Shape({k, n}), ge::FORMAT_NCHW, ge::DT_FLOAT);
auto size = desc.GetShape().GetShapeSize();
CHECK_EQ(size, ytensor->dims().production());
ge::TensorPtr ptensor = std::make_shared<ge::Tensor>();
ptensor->SetTensorDesc(desc);
auto* pdata = reinterpret_cast<uint8_t*>(ytensor->mutable_data<float>());
ptensor->SetData(pdata, size * sizeof(float));
consty->set_attr_value(ptensor);
lite::npu::OpList::Global().add(consty);
output_node->set_input_w(*consty);
auto y_const_node = std::make_shared<ge::op::Const>(y_var_name);
y_const_node->set_attr_value(lite::npu::CvtTensor(y, {k, n}));
mul_node->set_input_x2(*y_const_node);
lite::npu::OpList::Global().add(y_const_node);
}
lite::npu::OpList::Global().add(output_node);
lite::npu::OpList::Global().add(mul_node);
node_map_type outputs_map;
outputs_map[op_info->Output("Out").front()] = output_node;
outputs_map[op_info->Output("Out").front()] = mul_node;
return outputs_map;
}
......
......@@ -5,8 +5,8 @@ set -ex
ARM_OS="android" # android only yet
ARM_ABI="armv8" # armv8, armv7
ARM_LANG="gcc" # gcc only yet
ANDROID_STL="c++_static" # c++_shared, c++_static
DDK_ROOT="$(pwd)/ai_ddk_lib/" # HIAI SDK from https://developer.huawei.com/consumer/cn/hiai/
ANDROID_STL="c++_shared" # c++_shared/c++_static, c++_shared is used by HiAI DDK 310
DDK_ROOT="$(pwd)/ai_ddk_lib/" # HiAI DDK 310 from https://developer.huawei.com/consumer/cn/hiai/
TARGET_NAME="test_npu_pass" # default target
BUILD_EXTRA=OFF # ON(with sequence ops)/OFF
WITH_JAVA=ON # ON(build jar and jni so)/OFF
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册