未验证 提交 fc913904 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] fix elementwise_add op bridge and unit test (#2503)

add elementwise_sub, mul, div op bridge

test=develop
上级 e8ea4a56
...@@ -30,34 +30,50 @@ node_map_type ElementwiseConverter( ...@@ -30,34 +30,50 @@ node_map_type ElementwiseConverter(
auto unique_op_type = lite::npu::UniqueName(op_type); auto unique_op_type = lite::npu::UniqueName(op_type);
LOG(INFO) << "[NPU] Converting " + op_type + "..."; LOG(INFO) << "[NPU] Converting " + op_type + "...";
std::shared_ptr<ge::op::Eltwise> elementwise_node =
std::make_shared<ge::op::Eltwise>(unique_op_type);
auto x_var_name = op_info->Input("X").front(); auto x_var_name = op_info->Input("X").front();
auto y_var_name = op_info->Input("Y").front(); auto y_var_name = op_info->Input("Y").front();
CHECK_EQ(op_info->GetAttr<int>("axis"), -1)
<< "[NPU] elementwise only support inputs with same size";
CHECK(inputs_map.find(x_var_name) != inputs_map.end()); CHECK(inputs_map.find(x_var_name) != inputs_map.end());
elementwise_node->set_input_x1(*inputs_map.at(x_var_name));
lite::npu::OpList::Global().add(inputs_map.at(x_var_name));
std::shared_ptr<ge::Operator> elementwise_node = nullptr;
std::shared_ptr<ge::Operator> x_node = inputs_map.at(x_var_name);
std::shared_ptr<ge::Operator> y_node = nullptr;
if (inputs_map.find(y_var_name) != inputs_map.end()) { if (inputs_map.find(y_var_name) != inputs_map.end()) {
elementwise_node->set_input_x2(*inputs_map.at(y_var_name)); y_node = inputs_map.at(y_var_name);
lite::npu::OpList::Global().add(inputs_map.at(y_var_name));
} else { } else {
auto y_const_node = std::make_shared<ge::op::Const>(y_var_name); auto y_const_node = std::make_shared<ge::op::Const>(y_var_name);
auto* y = scope->FindVar(y_var_name)->GetMutable<Tensor>(); auto* y = scope->FindMutableTensor(y_var_name);
y_const_node->set_attr_value(lite::npu::CvtTensor(y)); y_const_node->set_attr_value(lite::npu::CvtTensor(y));
elementwise_node->set_input_x2(*y_const_node); y_node = y_const_node;
lite::npu::OpList::Global().add(y_const_node);
} }
lite::npu::OpList::Global().add(x_node);
lite::npu::OpList::Global().add(y_node);
lite::npu::OpList::Global().add(elementwise_node); if (op_type == "elementwise_add" ||
op_type == "fusion_elementwise_add_activation") {
auto elt_node = std::make_shared<ge::op::Add>(unique_op_type);
elt_node->set_input_x1(*x_node);
elt_node->set_input_x2(*y_node);
elementwise_node = elt_node;
} else if (op_type == "elementwise_sub") {
auto elt_node = std::make_shared<ge::op::Sub>(unique_op_type);
elt_node->set_input_x1(*x_node);
elt_node->set_input_x2(*y_node);
elementwise_node = elt_node;
} else if (op_type == "elementwise_mul") {
auto elt_node = std::make_shared<ge::op::Mul>(unique_op_type);
elt_node->set_input_x(*x_node);
elt_node->set_input_y(*y_node);
elementwise_node = elt_node;
} else if (op_type == "elementwise_div") {
auto elt_node = std::make_shared<ge::op::RealDiv>(unique_op_type);
elt_node->set_input_x1(*x_node);
elt_node->set_input_x2(*y_node);
elementwise_node = elt_node;
} else {
LOG(FATAL) << "unsupported op type: " << op_type;
}
// paddlelite has sum only lite::npu::OpList::Global().add(elementwise_node);
elementwise_node->set_attr_mode(1);
node_map_type outputs_map; node_map_type outputs_map;
if (op_type == "fusion_elementwise_add_activation") { if (op_type == "fusion_elementwise_add_activation") {
...@@ -86,3 +102,9 @@ REGISTER_NPU_BRIDGE(elementwise_add, ...@@ -86,3 +102,9 @@ REGISTER_NPU_BRIDGE(elementwise_add,
paddle::lite::kernels::npu::bridges::ElementwiseConverter); paddle::lite::kernels::npu::bridges::ElementwiseConverter);
REGISTER_NPU_BRIDGE(fusion_elementwise_add_activation, REGISTER_NPU_BRIDGE(fusion_elementwise_add_activation,
paddle::lite::kernels::npu::bridges::ElementwiseConverter); paddle::lite::kernels::npu::bridges::ElementwiseConverter);
REGISTER_NPU_BRIDGE(elementwise_sub,
paddle::lite::kernels::npu::bridges::ElementwiseConverter);
REGISTER_NPU_BRIDGE(elementwise_mul,
paddle::lite::kernels::npu::bridges::ElementwiseConverter);
REGISTER_NPU_BRIDGE(elementwise_div,
paddle::lite::kernels::npu::bridges::ElementwiseConverter);
...@@ -29,37 +29,28 @@ template <typename dtype> ...@@ -29,37 +29,28 @@ template <typename dtype>
void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) { void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
Scope* scope = op->scope(); Scope* scope = op->scope();
const OpInfo* op_info = op->op_info(); const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>(); auto x = scope->FindTensor("x");
auto y = scope->FindVar(op_info->Input("Y").front())->GetMutable<Tensor>(); auto y = scope->FindTensor("y");
auto out = auto out = scope->FindMutableTensor("out_ref");
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>(); out->Resize(x->dims());
auto x_data = x->data<dtype>(); auto x_data = x->data<dtype>();
auto y_data = y->data<dtype>(); auto y_data = y->data<dtype>();
dtype* out_data = out->mutable_data<dtype>(); auto out_data = out->mutable_data<dtype>();
auto x_dims = x->dims(); auto x_dims = x->dims();
auto y_dims = y->dims(); auto y_dims = y->dims();
int axis = op_info->GetAttr<int>("axis"); int axis = op_info->GetAttr<int>("axis");
if (axis < 0) { if (axis < 0) {
axis = x_dims.size() - y_dims.size(); axis += x_dims.size();
}
int batch = 1;
int channels = 1;
int num = 1;
for (int i = 0; i < axis; ++i) {
batch *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
channels *= y_dims[i];
}
for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) {
num *= x_dims[i];
} }
int batch = x_dims[0] / y_dims[0];
int channels = y->numel();
int num = x->numel() / channels / batch;
// do elementwise add/sub/max... // do elementwise add/sub/max...
std::string elt_type = "add"; std::string op_type = op_info->Type();
if (elt_type == "add") { if (op_type == "elementwise_add") {
for (int i = 0; i < batch; ++i) { for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) { for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num; int offset = (i * channels + j) * num;
...@@ -73,7 +64,7 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) { ...@@ -73,7 +64,7 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
} }
} }
} }
} else if (elt_type == "sub") { } else if (op_type == "elementwise_sub") {
for (int i = 0; i < batch; ++i) { for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) { for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num; int offset = (i * channels + j) * num;
...@@ -87,7 +78,7 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) { ...@@ -87,7 +78,7 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
} }
} }
} }
} else if (elt_type == "mul") { } else if (op_type == "elementwise_mul") {
for (int i = 0; i < batch; ++i) { for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) { for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num; int offset = (i * channels + j) * num;
...@@ -101,7 +92,21 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) { ...@@ -101,7 +92,21 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
} }
} }
} }
} else if (elt_type == "max") { } else if (op_type == "elementwise_div") {
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num;
const dtype* din_ptr = x_data + offset;
const dtype diny_data = y_data[j];
dtype* dout_ptr = out_data + offset;
for (int k = 0; k < num; ++k) {
*dout_ptr = *din_ptr / diny_data;
dout_ptr++;
din_ptr++;
}
}
}
} else if (op_type == "elementwise_max") {
for (int i = 0; i < batch; ++i) { for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) { for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num; int offset = (i * channels + j) * num;
...@@ -116,11 +121,14 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) { ...@@ -116,11 +121,14 @@ void elementwise_add_ref(const std::shared_ptr<operators::ElementwiseOp> op) {
} }
} }
} else { } else {
LOG(FATAL) << "unsupported Elementwise type: " << elt_type; LOG(FATAL) << "unsupported Elementwise type: " << op_type;
} }
} }
void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { void test_elementwise_add(const std::vector<int64_t>& x_shape,
const std::vector<int64_t>& y_shape,
int axis,
std::string elt_type) {
// prepare input&output variables // prepare input&output variables
Scope scope; Scope scope;
std::string x_var_name = "x"; std::string x_var_name = "x";
...@@ -131,16 +139,16 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { ...@@ -131,16 +139,16 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) {
auto* y = scope.Var(y_var_name)->GetMutable<Tensor>(); auto* y = scope.Var(y_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>(); auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>(); auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize({bs, ic, ih, iw}); x->Resize(x_shape);
y->Resize({bs, ic, ih, iw}); y->Resize(y_shape);
// initialize input&output data // initialize input&output data
FillTensor<float>(x); FillTensor<float>(x, 1, 5);
FillTensor<float>(y); FillTensor<float>(y, 1, 5);
// initialize op desc // initialize op desc
cpp::OpDesc opdesc; cpp::OpDesc opdesc;
opdesc.SetType("elementwise_add"); opdesc.SetType("elementwise_" + elt_type);
opdesc.SetInput("X", {x_var_name}); opdesc.SetInput("X", {x_var_name});
opdesc.SetInput("Y", {y_var_name}); opdesc.SetInput("Y", {y_var_name});
opdesc.SetOutput("Out", {out_var_name}); opdesc.SetOutput("Out", {out_var_name});
...@@ -149,7 +157,6 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { ...@@ -149,7 +157,6 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) {
// create and convert op to NPU model, then run it on NPU // create and convert op to NPU model, then run it on NPU
auto op = CreateOp<operators::ElementwiseOp>(opdesc, &scope); auto op = CreateOp<operators::ElementwiseOp>(opdesc, &scope);
LauchOp(op, {x_var_name}, {out_var_name}); LauchOp(op, {x_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
// execute reference implementation and save to output tensor // execute reference implementation and save to output tensor
elementwise_add_ref<float>(op); elementwise_add_ref<float>(op);
...@@ -158,19 +165,14 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) { ...@@ -158,19 +165,14 @@ void test_elementwise_add(int bs, int ic, int ih, int iw, int axis) {
auto* out_data = out->mutable_data<float>(); auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>(); auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) { for (int i = 0; i < out->dims().production(); i++) {
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-1); EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2);
} }
} }
TEST(NPUBridges, elementwise_add) { TEST(NPUBridges, elementwise_add) {
for (auto bs : {1, 4, 7}) { for (auto elt_type : {"add", "sub", "mul", "div"}) {
for (auto ic : {1, 4, 7}) { test_elementwise_add({1, 2, 3, 4}, {1, 2, 1, 1}, 1, elt_type);
for (auto ih : {1, 4, 7}) { test_elementwise_add({1, 2, 3, 4}, {1, 2, 3, 4}, 3, elt_type);
for (auto iw : {1, 4, 7}) {
for (auto axis : {-1}) test_elementwise_add(bs, ic, ih, iw, axis);
}
}
}
} }
} }
...@@ -182,3 +184,9 @@ TEST(NPUBridges, elementwise_add) { ...@@ -182,3 +184,9 @@ TEST(NPUBridges, elementwise_add) {
USE_LITE_OP(elementwise_add); USE_LITE_OP(elementwise_add);
USE_NPU_BRIDGE(elementwise_add); USE_NPU_BRIDGE(elementwise_add);
USE_LITE_OP(elementwise_sub);
USE_NPU_BRIDGE(elementwise_sub);
USE_LITE_OP(elementwise_mul);
USE_NPU_BRIDGE(elementwise_mul);
USE_LITE_OP(elementwise_div);
USE_NPU_BRIDGE(elementwise_div);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册