diff --git a/cmake/cross_compiling/npu.cmake b/cmake/cross_compiling/npu.cmake index 25aa4d2bc8c1c145e7a103c9164e1c9e231a8f9e..c22bb1db4fbf8a7370ff3e7c9aca40cc94d550a2 100644 --- a/cmake/cross_compiling/npu.cmake +++ b/cmake/cross_compiling/npu.cmake @@ -30,7 +30,7 @@ if(NOT NPU_DDK_INC) message(FATAL_ERROR "Can not find HiAiModelManagerService.h in ${NPU_DDK_ROOT}/include") endif() -include_directories("${NPU_DDK_ROOT}") +include_directories("${NPU_DDK_ROOT}/include") set(NPU_SUB_LIB_PATH "lib64") if(ARM_TARGET_ARCH_ABI STREQUAL "armv8") diff --git a/lite/backends/npu/device.h b/lite/backends/npu/device.h index 3eba0b77e4bdeb26cdff869771645a5ce7637ae4..411600ae0a38e4ee1b4a3ce3d6519b927eeb0a1a 100644 --- a/lite/backends/npu/device.h +++ b/lite/backends/npu/device.h @@ -18,8 +18,8 @@ #include #include #include -#include "ai_ddk_lib/include/HiAiModelManagerService.h" -#include "ai_ddk_lib/include/hiai_ir_build.h" +#include "HiAiModelManagerService.h" // NOLINT +#include "hiai_ir_build.h" // NOLINT namespace paddle { namespace lite { diff --git a/lite/core/mir/subgraph/subgraph_pass.cc b/lite/core/mir/subgraph/subgraph_pass.cc index b974ac7043e2fc1c656c4bad69e7ca50fffaff8c..1f831397916f012b9338b5783cb5c236aeac9972 100644 --- a/lite/core/mir/subgraph/subgraph_pass.cc +++ b/lite/core/mir/subgraph/subgraph_pass.cc @@ -27,7 +27,7 @@ namespace mir { void NPUSubgraphPass::Apply(const std::unique_ptr& graph) { std::unordered_set supported_lists; -#define USE_SUBGRAPH_BRIDGE(dev_type, op_type) supported_lists.insert(#op_type); +#define USE_SUBGRAPH_BRIDGE(op_type, target) supported_lists.insert(#op_type); #include "lite/kernels/npu/bridges/paddle_use_bridges.h" #undef USE_SUBGRAPH_BRIDGE auto teller = [&](Node* node) { @@ -41,7 +41,7 @@ void NPUSubgraphPass::Apply(const std::unique_ptr& graph) { void XPUSubgraphPass::Apply(const std::unique_ptr& graph) { std::unordered_set supported_lists; -#define USE_SUBGRAPH_BRIDGE(dev_type, op_type) supported_lists.insert(#op_type); +#define USE_SUBGRAPH_BRIDGE(op_type, target) supported_lists.insert(#op_type); #include "lite/kernels/xpu/bridges/paddle_use_bridges.h" #undef USE_SUBGRAPH_BRIDGE auto teller = [&](Node* node) { diff --git a/lite/kernels/npu/bridges/act_op.cc b/lite/kernels/npu/bridges/act_op.cc index 62eb649e0e5ec07c402347af98730cabf76c2540..a4d1009f1be286e8bd8dfcdd469ff53b6681c820 100644 --- a/lite/kernels/npu/bridges/act_op.cc +++ b/lite/kernels/npu/bridges/act_op.cc @@ -43,33 +43,34 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(out_type->layout() == DATALAYOUT(kNCHW)); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Act node - auto act_node = graph->AddNode(out_name); - act_node->set_input_x(*x_node); + auto act_node = graph->Add(out_name); + auto act_op = act_node->data(); + act_op->set_input_x(*x_node->data()); // TODO(hong19860320) set the coef value for act Ops, such as leaky_relu, // clipped_relu etc. - act_node->set_attr_mode(CvtActMode(op_type)); + act_op->set_attr_mode(CvtActMode(op_type)); if (op_type == "relu_clipped") { auto Relu_clipped_coef = op_info->GetAttr("Relu_clipped_coef"); - act_node->set_attr_coef(Relu_clipped_coef); + act_op->set_attr_coef(Relu_clipped_coef); } else if (op_type == "relu6") { float Relu_clipped_coef = 6.f; - act_node->set_attr_coef(Relu_clipped_coef); + act_op->set_attr_coef(Relu_clipped_coef); } else if (op_type == "leaky_relu") { auto alpha = op_info->GetAttr("alpha"); - act_node->set_attr_negative_slope(alpha); + act_op->set_attr_negative_slope(alpha); } else if (op_type == "hard_sigmoid") { auto slope = op_info->GetAttr("slope"); auto offset = op_info->GetAttr("offset"); - act_node->set_attr_negative_slope(slope); - act_node->set_attr_coef(offset); + act_op->set_attr_negative_slope(slope); + act_op->set_attr_coef(offset); } return SUCCESS; } @@ -79,25 +80,27 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - sigmoid, +REGISTER_SUBGRAPH_BRIDGE(sigmoid, + kNPU, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, relu, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, tanh, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - relu_clipped, +REGISTER_SUBGRAPH_BRIDGE(relu, kNPU, paddle::lite::subgraph::npu::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(tanh, kNPU, paddle::lite::subgraph::npu::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(relu_clipped, + kNPU, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, relu6, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - leaky_relu, +REGISTER_SUBGRAPH_BRIDGE(relu6, + kNPU, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, abs, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - softsign, +REGISTER_SUBGRAPH_BRIDGE(leaky_relu, + kNPU, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - softplus, +REGISTER_SUBGRAPH_BRIDGE(abs, kNPU, paddle::lite::subgraph::npu::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(softsign, + kNPU, paddle::lite::subgraph::npu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - hard_sigmoid, +REGISTER_SUBGRAPH_BRIDGE(softplus, + kNPU, + paddle::lite::subgraph::npu::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(hard_sigmoid, + kNPU, paddle::lite::subgraph::npu::ActConverter); diff --git a/lite/kernels/npu/bridges/argmax_op.cc b/lite/kernels/npu/bridges/argmax_op.cc index 835d4dd1ed384b2ec8e0436317326b0d07d4e49d..3d397aab9d5cc7cfb800198184d656856d8c101f 100644 --- a/lite/kernels/npu/bridges/argmax_op.cc +++ b/lite/kernels/npu/bridges/argmax_op.cc @@ -44,20 +44,21 @@ int ArgmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { int axis = op_info->GetAttr("axis"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Axis node - auto axis_const_node = graph->AddNode(out_name + "/axis", axis); + auto axis_node = graph->Add(out_name + "/axis", axis); // Argmax node - auto argmax_node = graph->AddNode(out_name); - argmax_node->set_input_x1(*x_node); - argmax_node->set_input_x2(*axis_const_node); + auto argmax_node = graph->Add(out_name); + auto argmax_op = argmax_node->data(); + argmax_op->set_input_x1(*x_node->data()); + argmax_op->set_input_x2(*axis_node->data()); return SUCCESS; } @@ -66,6 +67,6 @@ int ArgmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - arg_max, +REGISTER_SUBGRAPH_BRIDGE(arg_max, + kNPU, paddle::lite::subgraph::npu::ArgmaxConverter); diff --git a/lite/kernels/npu/bridges/batch_norm_op.cc b/lite/kernels/npu/bridges/batch_norm_op.cc index 57b52cf745ef189f4c6151940de73e9f944f72dc..d151fd8d7b35483f41190ecc789844a99e1f72de 100644 --- a/lite/kernels/npu/bridges/batch_norm_op.cc +++ b/lite/kernels/npu/bridges/batch_norm_op.cc @@ -67,30 +67,31 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { bool use_global_stats = op_info->GetAttr("use_global_stats"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Scale, Bias, Mean, Variance node - auto scale_const_node = graph->AddNode(scale_name, *scale); - auto bias_const_node = graph->AddNode(bias_name, *bias); - auto mean_const_node = graph->AddNode(mean_name, *mean); - auto variance_const_node = graph->AddNode(variance_name, *variance); + auto scale_node = graph->Add(scale_name, *scale); + auto bias_node = graph->Add(bias_name, *bias); + auto mean_node = graph->Add(mean_name, *mean); + auto variance_node = graph->Add(variance_name, *variance); // Batch Norm node - auto batch_norm_node = graph->AddNode(y_name); - batch_norm_node->set_input_x(*x_node); - batch_norm_node->set_input_scale(*scale_const_node); - batch_norm_node->set_input_offset(*bias_const_node); - batch_norm_node->set_input_mean(*mean_const_node); - batch_norm_node->set_input_variance(*variance_const_node); - batch_norm_node->set_attr_momentum(momentum); - batch_norm_node->set_attr_epsilon(epsilon); - batch_norm_node->set_attr_mode(mode); - batch_norm_node->set_attr_use_global_stats(use_global_stats); + auto batch_norm_node = graph->Add(y_name); + auto batch_norm_op = batch_norm_node->data(); + batch_norm_op->set_input_x(*x_node->data()); + batch_norm_op->set_input_scale(*scale_node->data()); + batch_norm_op->set_input_offset(*bias_node->data()); + batch_norm_op->set_input_mean(*mean_node->data()); + batch_norm_op->set_input_variance(*variance_node->data()); + batch_norm_op->set_attr_momentum(momentum); + batch_norm_op->set_attr_epsilon(epsilon); + batch_norm_op->set_attr_mode(mode); + batch_norm_op->set_attr_use_global_stats(use_global_stats); return SUCCESS; } @@ -99,6 +100,6 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - batch_norm, +REGISTER_SUBGRAPH_BRIDGE(batch_norm, + kNPU, paddle::lite::subgraph::npu::BatchNormConverter); diff --git a/lite/kernels/npu/bridges/concat_op.cc b/lite/kernels/npu/bridges/concat_op.cc index 44a2734c89d9fc3982dd3b934fac2d314bf600f3..e40af8703dd1dda7303f0976fa03abec7cdf7aaa 100644 --- a/lite/kernels/npu/bridges/concat_op.cc +++ b/lite/kernels/npu/bridges/concat_op.cc @@ -44,21 +44,22 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Traverse all of input nodes which are added into the new created concat // node - auto concat_node = graph->AddNode(out_name); - concat_node->set_attr_axis(axis); - concat_node->set_attr_N(num); - concat_node->create_dynamic_input_x(num); + auto concat_node = graph->Add(out_name); + auto concat_op = concat_node->data(); + concat_op->set_attr_axis(axis); + concat_op->set_attr_N(num); + concat_op->create_dynamic_input_x(num); int idx = 1; for (auto& x_name : x_names) { auto x = scope->FindMutableTensor(x_name); auto x_dims = x->dims(); - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } - concat_node->set_dynamic_input_x(idx, *x_node); + concat_op->set_dynamic_input_x(idx, *x_node->data()); idx++; } return SUCCESS; @@ -69,6 +70,6 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - concat, +REGISTER_SUBGRAPH_BRIDGE(concat, + kNPU, paddle::lite::subgraph::npu::ConcatConverter); diff --git a/lite/kernels/npu/bridges/conv_op.cc b/lite/kernels/npu/bridges/conv_op.cc index 6b34e76880f54a3cc221a6d1e2e539214b0f79f9..60877f768b2ea691d99635944d4f1dbef7365fb3 100644 --- a/lite/kernels/npu/bridges/conv_op.cc +++ b/lite/kernels/npu/bridges/conv_op.cc @@ -67,11 +67,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK_EQ(dilations.size(), 2L); // Input node - std::shared_ptr input_node = nullptr; - if (graph->HasNode(input_name)) { - input_node = graph->GetNode(input_name); + std::shared_ptr input_node = nullptr; + if (graph->Has(input_name)) { + input_node = graph->Get(input_name); } else { - input_node = graph->AddNode(input_name, input_dims); + input_node = graph->Add(input_name, *input); } if (paddings.size() == 2L) { @@ -109,104 +109,102 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // Filter node - auto filter_const_node = graph->AddNode(filter_name, *filter); + auto filter_node = graph->Add(filter_name, *filter); // Add bias node if exists bias // Supports the bias nodes with the following dimensions // 0: {oc} // 1: {1, oc, oh, ow} // 2: {n, oc, oh, ow} - std::shared_ptr bias_node = nullptr; + std::shared_ptr bias_node = nullptr; bool is_channel_bias = false; if (HasInputArg(op_info, scope, "Bias")) { auto bias_name = op_info->Input("Bias").front(); - auto bias_type = kernel->GetInputDeclType("Bias"); - CHECK(bias_type->precision() == PRECISION(kFloat)); - CHECK(bias_type->layout() == DATALAYOUT(kNCHW)); - auto bias = scope->FindMutableTensor(bias_name); - auto bias_dims = bias->dims(); - auto bias_data_size = bias_dims.production(); - auto output_data_size = output_dims.production(); - std::vector bias_shape; - if (bias_data_size == oc) { - // 0: {oc} - bias_shape = {1, oc, 1, 1}; - is_channel_bias = true; - } else if (bias_data_size == output_data_size / bs) { - // 1: {1, oc, oh, ow} - bias_shape = {1, output_dims[1], output_dims[2], output_dims[3]}; - } else if (bias_data_size == output_data_size) { - // 2: {n, oc, oh, ow} - bias_shape = output_dims.Vectorize(); + if (graph->Has(bias_name)) { + bias_node = graph->Get(bias_name); } else { - LOG(WARNING) << "[NPU] Bias dimension " << bias_dims - << " isn't supported in conv2d Op when output dimension is " - << output_dims; - return FAILED; - } - if (graph->HasNode(bias_name)) { - // Bias node from input node - bias_node = graph->GetNode(bias_name); - } else { - // Bias node with const data - bias_node = graph->AddNode(bias_name, *bias, bias_shape); + auto bias_type = kernel->GetInputDeclType("Bias"); + CHECK(bias_type->precision() == PRECISION(kFloat)); + CHECK(bias_type->layout() == DATALAYOUT(kNCHW)); + auto bias = scope->FindMutableTensor(bias_name); + auto bias_dims = bias->dims(); + auto bias_data_size = bias_dims.production(); + auto output_data_size = output_dims.production(); + std::vector bias_shape; + if (bias_data_size == oc) { + // 0: {oc} + bias_shape = {1, oc, 1, 1}; + is_channel_bias = true; + } else if (bias_data_size == output_data_size / bs) { + // 1: {1, oc, oh, ow} + bias_shape = {1, output_dims[1], output_dims[2], output_dims[3]}; + } else if (bias_data_size == output_data_size) { + // 2: {n, oc, oh, ow} + bias_shape = output_dims.Vectorize(); + } else { + LOG(WARNING) + << "[NPU] Bias dimension " << bias_dims + << " isn't supported in conv2d Op when output dimension is " + << output_dims; + return FAILED; + } + bias_node = graph->Add(bias_name, *bias, bias_shape); } } // Conv node - std::shared_ptr conv_node = nullptr; + std::shared_ptr conv_node = nullptr; if (use_depthwise_conv && is_depthwise_mode) { - auto depthwise_conv_node = - graph->AddNode(output_name); - depthwise_conv_node->set_input_x(*input_node); - depthwise_conv_node->set_input_filter(*filter_const_node); - depthwise_conv_node->set_attr_mode(1); - depthwise_conv_node->set_attr_algo(0); - depthwise_conv_node->set_attr_format(0); // NCHW - depthwise_conv_node->set_attr_pad_mode(5); // VALID - depthwise_conv_node->set_attr_group(groups); - depthwise_conv_node->set_attr_pad(ge::AttrValue::LIST_INT( + conv_node = graph->Add(output_name); + auto conv_op = conv_node->data(); + conv_op->set_input_x(*input_node->data()); + conv_op->set_input_filter(*filter_node->data()); + conv_op->set_attr_mode(1); + conv_op->set_attr_algo(0); + conv_op->set_attr_format(0); // NCHW + conv_op->set_attr_pad_mode(5); // VALID + conv_op->set_attr_group(groups); + conv_op->set_attr_pad(ge::AttrValue::LIST_INT( {paddings[0], paddings[1], paddings[2], paddings[3]})); - depthwise_conv_node->set_attr_dilation( + conv_op->set_attr_dilation( ge::AttrValue::LIST_INT({dilations[0], dilations[1]})); - depthwise_conv_node->set_attr_stride( - ge::AttrValue::LIST_INT({strides[0], strides[1]})); - depthwise_conv_node->set_attr_kernel( + conv_op->set_attr_stride(ge::AttrValue::LIST_INT({strides[0], strides[1]})); + conv_op->set_attr_kernel( ge::AttrValue::LIST_INT({filter_dims[2], filter_dims[3]})); - conv_node = depthwise_conv_node; // ConvolutionDepthwise Op doesn't support bias, so append Add node to // support bias if (bias_node != nullptr) { - auto add_node = graph->AddNode(output_name); - add_node->set_input_x1(*depthwise_conv_node); - add_node->set_input_x2(*bias_node); + auto add_node = graph->Add(output_name); + auto add_op = add_node->data(); + add_op->set_input_x1(*conv_node->data()); + add_op->set_input_x2(*bias_node->data()); conv_node = add_node; } } else { - auto common_conv_node = graph->AddNode(output_name); - common_conv_node->set_input_x(*input_node); - common_conv_node->set_input_w(*filter_const_node); - common_conv_node->set_attr_mode(1); - common_conv_node->set_attr_pad_mode(0); // NOTSET - common_conv_node->set_attr_group(groups); - common_conv_node->set_attr_pad(ge::AttrValue::LIST_INT( + conv_node = graph->Add(output_name); + auto conv_op = conv_node->data(); + conv_op->set_input_x(*input_node->data()); + conv_op->set_input_w(*filter_node->data()); + conv_op->set_attr_mode(1); + conv_op->set_attr_pad_mode(0); // NOTSET + conv_op->set_attr_group(groups); + conv_op->set_attr_pad(ge::AttrValue::LIST_INT( {paddings[0], paddings[0], paddings[2], paddings[2]})); - common_conv_node->set_attr_dilation( + conv_op->set_attr_dilation( ge::AttrValue::LIST_INT({dilations[0], dilations[1]})); - common_conv_node->set_attr_stride( - ge::AttrValue::LIST_INT({strides[0], strides[1]})); - common_conv_node->set_attr_kernel( + conv_op->set_attr_stride(ge::AttrValue::LIST_INT({strides[0], strides[1]})); + conv_op->set_attr_kernel( ge::AttrValue::LIST_INT({filter_dims[2], filter_dims[3]})); - conv_node = common_conv_node; // Convolution Op only support bias with dimension {1, oc, 1, 1}, // so append Add node if dimension is {1, oc, oh, ow} or (n, oc, oh, ow) if (bias_node != nullptr) { if (is_channel_bias) { - common_conv_node->set_input_b(*bias_node); + conv_op->set_input_b(*bias_node->data()); } else { - auto add_node = graph->AddNode(output_name); - add_node->set_input_x1(*common_conv_node); - add_node->set_input_x2(*bias_node); + auto add_node = graph->Add(output_name); + auto add_op = add_node->data(); + add_op->set_input_x1(*conv_node->data()); + add_op->set_input_x2(*bias_node->data()); conv_node = add_node; } } @@ -215,9 +213,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { if (fuse_relu) { // Append relu node if fuse_relu is true - auto relu_node = graph->AddNode(output_name); - relu_node->set_input_x(*conv_node); - relu_node->set_attr_mode(CvtActMode("relu")); + auto relu_node = graph->Add(output_name); + auto relu_op = relu_node->data(); + relu_op->set_input_x(*conv_node->data()); + relu_op->set_attr_mode(CvtActMode("relu")); } return REBUILD_WHEN_SHAPE_CHANGED; } @@ -227,9 +226,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - conv2d, +REGISTER_SUBGRAPH_BRIDGE(conv2d, + kNPU, paddle::lite::subgraph::npu::ConvConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - depthwise_conv2d, +REGISTER_SUBGRAPH_BRIDGE(depthwise_conv2d, + kNPU, paddle::lite::subgraph::npu::ConvConverter); diff --git a/lite/kernels/npu/bridges/conv_transpose_op.cc b/lite/kernels/npu/bridges/conv_transpose_op.cc index 5ac0723c7841ae08290a1d0dfeb1265a855f8fde..ab31a920ec045c8d12139b804abbcca94f3e009a 100644 --- a/lite/kernels/npu/bridges/conv_transpose_op.cc +++ b/lite/kernels/npu/bridges/conv_transpose_op.cc @@ -58,11 +58,11 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK_EQ(dilations.size(), 2L); // Input node - std::shared_ptr input_node = nullptr; - if (graph->HasNode(input_name)) { - input_node = graph->GetNode(input_name); + std::shared_ptr input_node = nullptr; + if (graph->Has(input_name)) { + input_node = graph->Get(input_name); } else { - input_node = graph->AddNode(input_name, input_dims); + input_node = graph->Add(input_name, *input); } // Create input sizes node to describe the dimensions of input tensor @@ -83,55 +83,59 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { (input_dims[i + 2] - 1) * strides[i] + kernel_ext - 2 * paddings[i]; input_sizes.push_back(output_size); } - auto input_sizes_const_node = - graph->AddNode(output_name + "/input_sizes", input_sizes); + auto input_sizes_node = graph->Add(output_name + "/input_sizes", input_sizes); // Filter node - auto filter_const_node = graph->AddNode(filter_name, *filter); + auto filter_node = graph->Add(filter_name, *filter); // Deconv node - auto conv_transpose_node = graph->AddNode(output_name); - conv_transpose_node->set_input_input_sizes(*input_sizes_const_node); - conv_transpose_node->set_input_filter(*filter_const_node); - conv_transpose_node->set_input_x(*input_node); + auto conv_transpose_node = graph->Add(output_name); + auto conv_transpose_op = conv_transpose_node->data(); + conv_transpose_op->set_input_input_sizes(*input_sizes_node->data()); + conv_transpose_op->set_input_filter(*filter_node->data()); + conv_transpose_op->set_input_x(*input_node->data()); // Set attributes - conv_transpose_node->set_attr_format(0); // NCHW - conv_transpose_node->set_attr_pad_mode(0); // NOTSET - conv_transpose_node->set_attr_group(groups); - conv_transpose_node->set_attr_pad(ge::AttrValue::LIST_INT( + conv_transpose_op->set_attr_format(0); // NCHW + conv_transpose_op->set_attr_pad_mode(0); // NOTSET + conv_transpose_op->set_attr_group(groups); + conv_transpose_op->set_attr_pad(ge::AttrValue::LIST_INT( {paddings[0], paddings[1], paddings[2], paddings[3]})); - conv_transpose_node->set_attr_dilation( + conv_transpose_op->set_attr_dilation( ge::AttrValue::LIST_INT({dilations[0], dilations[1]})); - conv_transpose_node->set_attr_stride( + conv_transpose_op->set_attr_stride( ge::AttrValue::LIST_INT({strides[0], strides[1]})); - conv_transpose_node->set_attr_kernel( + conv_transpose_op->set_attr_kernel( ge::AttrValue::LIST_INT({filter_dims[2], filter_dims[3]})); // Append add node to add bias if exists bias - std::shared_ptr output_node = conv_transpose_node; if (HasInputArg(op_info, scope, "Bias")) { - // Create bias node + std::shared_ptr bias_node = nullptr; auto bias_name = op_info->Input("Bias").front(); - auto bias_type = kernel->GetInputDeclType("Bias"); - CHECK(bias_type->precision() == PRECISION(kFloat)); - CHECK(bias_type->layout() == DATALAYOUT(kNCHW)); - auto bias = scope->FindMutableTensor(bias_name); - auto channel_size = bias->dims().production(); - CHECK_EQ(channel_size, filter_dims[1] * groups); - auto bias_const_node = - graph->AddNode(bias_name, *bias, {1, channel_size, 1, 1}); + if (graph->Has(bias_name)) { + bias_node = graph->Get(bias_name); + } else { + auto bias_type = kernel->GetInputDeclType("Bias"); + CHECK(bias_type->precision() == PRECISION(kFloat)); + CHECK(bias_type->layout() == DATALAYOUT(kNCHW)); + auto bias = scope->FindMutableTensor(bias_name); + auto channel_size = bias->dims().production(); + CHECK_EQ(channel_size, filter_dims[1] * groups); + bias_node = graph->Add(bias_name, *bias, {1, channel_size, 1, 1}); + } // Append add node to add bias node - auto add_node = graph->AddNode(output_name); - add_node->set_input_x1(*conv_transpose_node); - add_node->set_input_x2(*bias_const_node); - output_node = add_node; + auto add_node = graph->Add(output_name); + auto add_op = add_node->data(); + add_op->set_input_x1(*conv_transpose_node->data()); + add_op->set_input_x2(*bias_node->data()); + conv_transpose_node = add_node; } if (fuse_relu) { // Append relu node if fuse_relu is true - auto relu_node = graph->AddNode(output_name); - relu_node->set_input_x(*output_node); - relu_node->set_attr_mode(CvtActMode("relu")); + auto relu_node = graph->Add(output_name); + auto relu_op = relu_node->data(); + relu_op->set_input_x(*conv_transpose_node->data()); + relu_op->set_attr_mode(CvtActMode("relu")); } return REBUILD_WHEN_SHAPE_CHANGED; } @@ -141,6 +145,6 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - conv2d_transpose, +REGISTER_SUBGRAPH_BRIDGE(conv2d_transpose, + kNPU, paddle::lite::subgraph::npu::ConvTransposeConverter); diff --git a/lite/kernels/npu/bridges/elementwise_ops.cc b/lite/kernels/npu/bridges/elementwise_ops.cc index a31a1426dc5dc8c537a05bf44287d7256be7085a..69b77b5def4faf9722fdbd8bd6f2480e67e2c160 100644 --- a/lite/kernels/npu/bridges/elementwise_ops.cc +++ b/lite/kernels/npu/bridges/elementwise_ops.cc @@ -74,45 +74,45 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto axis = op_info->GetAttr("axis"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Y node - std::shared_ptr y_node = nullptr; - if (graph->HasNode(y_name)) { - y_node = graph->GetNode(y_name); + std::shared_ptr y_node = nullptr; + if (graph->Has(y_name)) { + y_node = graph->Get(y_name); } else { auto y_new_shape = CvtYShape(x_dims, y_dims, axis); - y_node = graph->AddNode(y_name, y_new_shape); + y_node = graph->Add(y_name, *y, y_new_shape); } // Elementwise node - std::shared_ptr elementwise_node = nullptr; + std::shared_ptr elt_node = nullptr; if (op_type == "elementwise_add" || op_type == "fusion_elementwise_add_activation") { - auto elt_node = graph->AddNode(out_name); - elt_node->set_input_x1(*x_node); - elt_node->set_input_x2(*y_node); - elementwise_node = elt_node; + elt_node = graph->Add(out_name); + auto elt_op = elt_node->data(); + elt_op->set_input_x1(*x_node->data()); + elt_op->set_input_x2(*y_node->data()); } else if (op_type == "elementwise_sub") { - auto elt_node = graph->AddNode(out_name); - elt_node->set_input_x1(*x_node); - elt_node->set_input_x2(*y_node); - elementwise_node = elt_node; + elt_node = graph->Add(out_name); + auto elt_op = elt_node->data(); + elt_op->set_input_x1(*x_node->data()); + elt_op->set_input_x2(*y_node->data()); } else if (op_type == "elementwise_mul") { - auto elt_node = graph->AddNode(out_name); - elt_node->set_input_x(*x_node); - elt_node->set_input_y(*y_node); - elementwise_node = elt_node; + elt_node = graph->Add(out_name); + auto elt_op = elt_node->data(); + elt_op->set_input_x(*x_node->data()); + elt_op->set_input_y(*y_node->data()); } else if (op_type == "elementwise_div") { - auto elt_node = graph->AddNode(out_name); - elt_node->set_input_x1(*x_node); - elt_node->set_input_x2(*y_node); - elementwise_node = elt_node; + elt_node = graph->Add(out_name); + auto elt_op = elt_node->data(); + elt_op->set_input_x1(*x_node->data()); + elt_op->set_input_x2(*y_node->data()); } else { LOG(WARNING) << "[NPU] Unsupported op type: " << op_type; return FAILED; @@ -121,11 +121,12 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { // Act node if (op_type == "fusion_elementwise_add_activation") { auto act_type = op_info->GetAttr("act_type"); - auto act_node = graph->AddNode(out_name); - act_node->set_input_x(*elementwise_node); + auto act_node = graph->Add(out_name); + auto act_op = act_node->data(); + act_op->set_input_x(*elt_node->data()); // TODO(hong19860320) set the coef value for act Ops, such as leaky_relu, // clipped_relu etc. - act_node->set_attr_mode(CvtActMode(act_type)); + act_op->set_attr_mode(CvtActMode(act_type)); } return REBUILD_WHEN_SHAPE_CHANGED; } @@ -135,18 +136,18 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - elementwise_add, +REGISTER_SUBGRAPH_BRIDGE(elementwise_add, + kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - fusion_elementwise_add_activation, +REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, + kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - elementwise_sub, +REGISTER_SUBGRAPH_BRIDGE(elementwise_sub, + kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - elementwise_mul, +REGISTER_SUBGRAPH_BRIDGE(elementwise_mul, + kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - elementwise_div, +REGISTER_SUBGRAPH_BRIDGE(elementwise_div, + kNPU, paddle::lite::subgraph::npu::ElementwiseConverter); diff --git a/lite/kernels/npu/bridges/fc_op.cc b/lite/kernels/npu/bridges/fc_op.cc index 7b66d545651f5e41c65f36e743d74f0508daf8b3..3d028172154e58c1ed191b4d4eb780e9937458a5 100644 --- a/lite/kernels/npu/bridges/fc_op.cc +++ b/lite/kernels/npu/bridges/fc_op.cc @@ -57,22 +57,24 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { << " m: " << m << " k: " << k << " n: " << n; // Create input node and reshape it to (m, k, 1, 1) - std::shared_ptr input_node = nullptr; - if (graph->HasNode(input_name)) { - input_node = graph->GetNode(input_name); + std::shared_ptr input_node = nullptr; + if (graph->Has(input_name)) { + input_node = graph->Get(input_name); } else { - input_node = graph->AddNode(input_name, input_dims); + input_node = graph->Add(input_name, *input); } auto reshaped_input_node = - graph->AddNode(input_name + "/reshape"); - reshaped_input_node->set_input_tensor(*input_node); - reshaped_input_node->set_attr_shape({m, k, 1, 1}); - reshaped_input_node->set_attr_axis(0); + graph->Add(input_name + "/reshape"); + auto reshaped_input_op = reshaped_input_node->data(); + reshaped_input_op->set_input_tensor(*input_node->data()); + reshaped_input_op->set_attr_shape({m, k, 1, 1}); + reshaped_input_op->set_attr_axis(0); // Create w const node, set its shape to (n, k, 1, 1) and fill with // the transposed w tensor Tensor transpose_w; transpose_w.Resize({n, k, 1, 1}); + transpose_w.set_persistable(true); auto transpose_w_data = transpose_w.mutable_data(); auto w_data = w->mutable_data(); for (int i = 0; i < k; i++) { @@ -80,29 +82,36 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { transpose_w_data[j * k + i] = w_data[i * n + j]; } } - auto trans_w_const_node = graph->AddNode(w_name, transpose_w); + auto trans_w_node = graph->Add(w_name, transpose_w); // FC node - auto fc_node = graph->AddNode(out_name + "/fc"); - fc_node->set_input_x(*reshaped_input_node); - fc_node->set_input_w(*trans_w_const_node); + auto fc_node = graph->Add(out_name + "/fc"); + auto fc_op = fc_node->data(); + fc_op->set_input_x(*reshaped_input_node->data()); + fc_op->set_input_w(*trans_w_node->data()); // Add bias node if bias tensor exists if (HasInputArg(op_info, scope, "Bias")) { + std::shared_ptr bias_node = nullptr; auto bias_name = op_info->Input("Bias").front(); - auto bias_type = kernel->GetInputDeclType("Bias"); - CHECK(bias_type->precision() == PRECISION(kFloat)); - CHECK(bias_type->layout() == DATALAYOUT(kNCHW)); - auto bias = scope->FindMutableTensor(bias_name); - auto bias_dims = bias->dims(); - CHECK_EQ(bias_dims.production(), n); - auto bias_const_node = graph->AddNode(bias_name, *bias, {1, n, 1, 1}); - fc_node->set_input_b(*bias_const_node); + if (graph->Has(bias_name)) { + bias_node = graph->Get(bias_name); + } else { + auto bias_type = kernel->GetInputDeclType("Bias"); + CHECK(bias_type->precision() == PRECISION(kFloat)); + CHECK(bias_type->layout() == DATALAYOUT(kNCHW)); + auto bias = scope->FindMutableTensor(bias_name); + auto bias_dims = bias->dims(); + CHECK_EQ(bias_dims.production(), n); + bias_node = graph->Add(bias_name, *bias, {1, n, 1, 1}); + } + fc_op->set_input_b(*bias_node->data()); } // Reshape output of FC node from (m, n, 1, 1) to (m, n) - auto reshaped_fc_node = graph->AddNode(out_name); - reshaped_fc_node->set_input_tensor(*fc_node); - reshaped_fc_node->set_attr_shape({m, n}); - reshaped_fc_node->set_attr_axis(0); + auto reshaped_fc_node = graph->Add(out_name); + auto reshaped_fc_op = reshaped_fc_node->data(); + reshaped_fc_op->set_input_tensor(*fc_node->data()); + reshaped_fc_op->set_attr_shape({m, n}); + reshaped_fc_op->set_attr_axis(0); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -111,4 +120,4 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, fc, paddle::lite::subgraph::npu::FCConverter); +REGISTER_SUBGRAPH_BRIDGE(fc, kNPU, paddle::lite::subgraph::npu::FCConverter); diff --git a/lite/kernels/npu/bridges/graph.cc b/lite/kernels/npu/bridges/graph.cc index 48ebfd567320f9b55d104a350a1ba35176fd47b4..ed0abff350cba388f4b4b446516032ac3a80d63f 100644 --- a/lite/kernels/npu/bridges/graph.cc +++ b/lite/kernels/npu/bridges/graph.cc @@ -21,26 +21,52 @@ namespace lite { namespace subgraph { namespace npu { -// Const node -std::shared_ptr Graph::AddNode(const std::string& name, - const Tensor& tensor, - std::vector shape, - PrecisionType precision, - DataLayoutType layout) { - auto node = AddNode(name, precision, layout); - node->set_attr_value(CvtTensor(tensor, shape, precision, layout)); +int Graph::Add(const std::string& name, std::shared_ptr node) { + auto it = nodes_.find(name); + if (it != nodes_.end()) { + // Only variable node can be shared with the same name + if (!node->is_var() || !it->second.back()->is_var()) { + LOG(FATAL) << "[NPU] Const or data node " << name << " is redefined."; + return -1; + } + } else { + auto ret = nodes_.insert( + std::make_pair(name, std::vector>())); + CHECK(ret.second); + it = ret.first; + } + it->second.push_back(node); + return it->second.size(); +} + +// Const or data node +std::shared_ptr Graph::Add(const std::string& name, + const Tensor& tensor, + std::vector shape, + PrecisionType precision, + DataLayoutType layout) { + std::shared_ptr node = nullptr; + if (tensor.persistable()) { + // Const node + node = Add(name, precision, layout); + node->data()->set_attr_value( + CvtTensor(tensor, shape, precision, layout)); + } else { + // Data node + node = Add(name, shape, precision, layout); + } return node; } // Data node -std::shared_ptr Graph::AddNode(const std::string& name, - std::vector shape, - PrecisionType precision, - DataLayoutType layout) { - auto node = AddNode(name); +std::shared_ptr Graph::Add(const std::string& name, + std::vector shape, + PrecisionType precision, + DataLayoutType layout) { + auto node = Add(name, precision, layout); ge::TensorDesc desc( ge::Shape(shape), CvtDataLayoutType(layout), CvtPrecisionType(precision)); - node->update_input_desc_x(desc); + node->data()->update_input_desc_x(desc); return node; } diff --git a/lite/kernels/npu/bridges/graph.h b/lite/kernels/npu/bridges/graph.h index 9b6e49c5e9e12ae6fc5ba38192ccfe487fcc0c07..ac0f85d3e1d4bc0d73504dce2af8ab8655a86726 100644 --- a/lite/kernels/npu/bridges/graph.h +++ b/lite/kernels/npu/bridges/graph.h @@ -19,7 +19,7 @@ #include #include #include -#include "ai_ddk_lib/include/graph/op/all_ops.h" +#include "graph/op/all_ops.h" #include "lite/core/op_lite.h" #include "lite/core/tensor.h" @@ -28,94 +28,97 @@ namespace lite { namespace subgraph { namespace npu { -// Type of graph nodes -class Type { +// Graph and node is defined to collect all of converted HiAI IR nodes +class Node { public: - Type(PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW), - bool persistable = false) - : precision_(precision), layout_(layout), persistable_(persistable) {} - + enum class Role { + kVar = 0, + kConst, + kData, + }; + + Node(std::shared_ptr data, + PrecisionType precision, + DataLayoutType layout, + Role role) + : data_(data), precision_(precision), layout_(layout), role_(role) {} + Node(PrecisionType precision, DataLayoutType layout, Role role) + : precision_(precision), layout_(layout), role_(role) {} + + void set_data(std::shared_ptr data) { data_ = data; } void set_precision(PrecisionType precision) { precision_ = precision; } void set_layout(DataLayoutType layout) { layout_ = layout; } - bool set_persistable(bool persistable) { persistable_ = persistable; } + void set_role(Role role) { role_ = role; } + template + std::shared_ptr data() { + return std::static_pointer_cast(data_); + } + std::shared_ptr data() { return data_; } PrecisionType precision() const { return precision_; } DataLayoutType layout() const { return layout_; } - bool persistable() const { return persistable_; } + bool is_var() const { return role_ == Role::kVar; } + bool is_const() const { return role_ == Role::kConst; } + bool is_data() const { return role_ == Role::kData; } private: + std::shared_ptr data_{nullptr}; PrecisionType precision_{PRECISION(kFloat)}; DataLayoutType layout_{DATALAYOUT(kNCHW)}; - bool persistable_{false}; + Role role_{Role::kVar}; }; -// Graph to collect all of converted HiAI IR nodes class Graph { public: + int Add(const std::string& name, std::shared_ptr node); + + // Variable, const or data node template - std::shared_ptr AddNode(const std::string& name, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)) { - auto unique_name = [&](const std::string& key) { - int idx = 1; - auto it = counts_.find(key); - if (it == counts_.end()) { - counts_.insert(std::make_pair(key, idx)); - } else { - idx = ++(it->second); - } - return key + "_" + std::to_string(idx); - }; - bool persistable = typeid(T) == typeid(ge::op::Const); - auto it = nodes_.find(name); - if (it != nodes_.end()) { - // Only variable can rebind the name - CHECK(!it->second.second.persistable() && !persistable) - << "[NPU] Node " << name << " redefined."; - // Generate a new unique name as the key to bind the origin node: - // new_name->node - nodes_.insert(std::make_pair(unique_name(name + "_var"), it->second)); - nodes_.erase(it); + std::shared_ptr Add(const std::string& name, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)) { + Node::Role role = Node::Role::kVar; + if (typeid(T) == typeid(ge::op::Const)) { + role = Node::Role::kConst; + } else if (typeid(T) == typeid(ge::op::Data)) { + role = Node::Role::kData; } - // Create a new node and bind with the name: name->new_node - auto node = std::make_shared(unique_name(name + "_op")); - nodes_.insert(std::make_pair( - name, std::make_pair(node, Type(precision, layout, persistable)))); + auto node = std::make_shared(precision, layout, role); + auto idx = Add(name, node); + CHECK_GE(idx, 1); + // Generate a unique name for the created HiAI IR + node->set_data(std::make_shared(name + "__" + std::to_string(idx))); return node; } - // Const node - std::shared_ptr AddNode( - const std::string& name, - const Tensor& tensor, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, tensor, tensor.dims().Vectorize(), precision, layout); + // Const or data node + std::shared_ptr Add(const std::string& name, + const Tensor& tensor, + std::vector shape, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)); + + std::shared_ptr Add(const std::string& name, + const Tensor& tensor, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, tensor, tensor.dims().Vectorize(), precision, layout); } - std::shared_ptr AddNode( - const std::string& name, - const Tensor& tensor, - std::vector shape, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)); - - std::shared_ptr AddNode( - const std::string& name, - const Tensor& tensor, - DDim dims, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, tensor, dims.Vectorize(), precision, layout); + std::shared_ptr Add(const std::string& name, + const Tensor& tensor, + DDim dims, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, tensor, dims.Vectorize(), precision, layout); } + // Const node template - std::shared_ptr AddNode( - const std::string& name, - const std::vector& data, - std::vector shape = {}, - DataLayoutType layout = DATALAYOUT(kNCHW)) { + std::shared_ptr Add(const std::string& name, + const std::vector& data, + std::vector shape = {}, + DataLayoutType layout = DATALAYOUT(kNCHW)) { const std::type_info& info = typeid(T); PrecisionType precision = PRECISION(kFloat); if (info == typeid(float)) { @@ -138,78 +141,66 @@ class Graph { } Tensor tensor; tensor.Resize(shape); + tensor.set_persistable(true); std::memcpy(reinterpret_cast(tensor.mutable_data()), reinterpret_cast(data.data()), data.size() * sizeof(T)); - return AddNode(name, tensor, precision, layout); + return Add(name, tensor, precision, layout); } template - std::shared_ptr AddNode( - const std::string& name, - const std::vector& data, - DDim dims, - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, data, dims.Vectorize(), layout); + std::shared_ptr Add(const std::string& name, + const std::vector& data, + DDim dims, + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, data, dims.Vectorize(), layout); } template - std::shared_ptr AddNode( - const std::string& name, - T value, - std::vector shape = {1}, - DataLayoutType layout = DATALAYOUT(kNCHW)) { + std::shared_ptr Add(const std::string& name, + T value, + std::vector shape = {1}, + DataLayoutType layout = DATALAYOUT(kNCHW)) { int64_t size = 1; for (auto i : shape) { size *= i; } std::vector data(size, value); - return AddNode(name, data, shape, layout); + return Add(name, data, shape, layout); } template - std::shared_ptr AddNode( - const std::string& name, - T value, - DDim dims, - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, value, dims.Vectorize(), layout); + std::shared_ptr Add(const std::string& name, + T value, + DDim dims, + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, value, dims.Vectorize(), layout); } // Data node - std::shared_ptr AddNode( - const std::string& name, - std::vector shape, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)); - - std::shared_ptr AddNode( - const std::string& name, - DDim dims, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, dims.Vectorize(), precision, layout); - } - - std::shared_ptr GetNode(std::string name) { - CHECK(HasNode(name)) << "[NPU] Node " << name << " not found."; - return nodes_.at(name).first; + std::shared_ptr Add(const std::string& name, + std::vector shape, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)); + + std::shared_ptr Add(const std::string& name, + DDim dims, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, dims.Vectorize(), precision, layout); } - const Type& GetType(const std::string& name) { - CHECK(HasNode(name)) << "[NPU] Node " << name << " not found."; - return nodes_.at(name).second; + std::shared_ptr Get(std::string name) { + CHECK(Has(name)) << "[NPU] Node " << name << " not found."; + return nodes_.at(name).back(); } - bool HasNode(const std::string& name) { + bool Has(const std::string& name) { return nodes_.find(name) != nodes_.end(); } private: - std::unordered_map, Type>> - nodes_; - std::unordered_map counts_; + std::unordered_map>> nodes_; }; } // namespace npu diff --git a/lite/kernels/npu/bridges/interpolate_op.cc b/lite/kernels/npu/bridges/interpolate_op.cc index f95ebc347a86051aba23e0d61799ab2efb5c5567..238200abf3d37c8967a371f6ada4d6b6d6223b4d 100644 --- a/lite/kernels/npu/bridges/interpolate_op.cc +++ b/lite/kernels/npu/bridges/interpolate_op.cc @@ -55,11 +55,11 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { "supported in HiAI DDK"; // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Priority: OutSize > scale > out_h/out_w @@ -71,17 +71,18 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // Update out_h and out_w and create out_size node if has OutSize - std::shared_ptr out_size_node = nullptr; + std::shared_ptr out_size_node = nullptr; if (HasInputArg(op_info, scope, "OutSize")) { auto out_size_name = op_info->Input("OutSize").front(); auto out_size_type = kernel->GetInputDeclType("OutSize"); CHECK(out_size_type->precision() == PRECISION(kInt32)); CHECK(out_size_type->layout() == DATALAYOUT(kNCHW)); - if (graph->HasNode(out_size_name)) { - out_size_node = graph->GetNode(out_size_name); + if (graph->Has(out_size_name)) { + out_size_node = graph->Get(out_size_name); } else { auto out_size = scope->FindMutableTensor(out_size_name); CHECK_EQ(out_size->numel(), 2); + CHECK(out_size->persistable()); auto out_size_data = out_size->mutable_data(); // Update out_h and out_w if has OutSize out_h = out_size_data[0]; @@ -97,22 +98,25 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { << " is too large, should not exceed " << largest_multiple << " in HiAI DDK"; } - out_size_node = graph->AddNode(out_name + "/out_size", - std::vector({out_h, out_w})); + out_size_node = + graph->Add(out_name + "/out_size", std::vector({out_h, out_w})); } if (interp_method == "bilinear") { - auto bilinear_interp_node = - graph->AddNode(out_name); - bilinear_interp_node->set_input_x(*x_node); - bilinear_interp_node->set_input_size(*out_size_node); - bilinear_interp_node->set_attr_align_corners(align_corners); + auto bilinear_interp_node = graph->Add(out_name); + auto bilinear_interp_op = + bilinear_interp_node->data(); + bilinear_interp_op->set_input_x(*x_node->data()); + bilinear_interp_op->set_input_size(*out_size_node->data()); + bilinear_interp_op->set_attr_align_corners(align_corners); } else if (interp_method == "nearest") { auto nearest_interp_node = - graph->AddNode(out_name); - nearest_interp_node->set_input_image(*x_node); - nearest_interp_node->set_input_size(*out_size_node); - nearest_interp_node->set_attr_align_corners(align_corners); + graph->Add(out_name); + auto nearest_interp_op = + nearest_interp_node->data(); + nearest_interp_op->set_input_image(*x_node->data()); + nearest_interp_op->set_input_size(*out_size_node->data()); + nearest_interp_op->set_attr_align_corners(align_corners); } else { LOG(WARNING) << "[NPU] Unsupported interpolate method: " << interp_method; return FAILED; @@ -125,9 +129,9 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - bilinear_interp, +REGISTER_SUBGRAPH_BRIDGE(bilinear_interp, + kNPU, paddle::lite::subgraph::npu::InterpolateConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - nearest_interp, +REGISTER_SUBGRAPH_BRIDGE(nearest_interp, + kNPU, paddle::lite::subgraph::npu::InterpolateConverter); diff --git a/lite/kernels/npu/bridges/mul_op.cc b/lite/kernels/npu/bridges/mul_op.cc index f63b6826b98cdf5f2f8207376d367ee5f89e0c51..27df45819537faed291e108cc8a78a9a9de202cf 100644 --- a/lite/kernels/npu/bridges/mul_op.cc +++ b/lite/kernels/npu/bridges/mul_op.cc @@ -56,45 +56,46 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { << "[NPU] columns of X must be equal with rows of Y"; int n = y_dims.Slice(y_num_col_dims, y_dims.size()).production(); VLOG(3) << "m:" << m << ",n:" << n << ",k:" << k; - VLOG(3) << "x_name:" << x_name << ", is data: " << graph->HasNode(x_name); - VLOG(3) << "y_name:" << y_name << ", is data: " << graph->HasNode(y_name); - CHECK(graph->HasNode(x_name)) + VLOG(3) << "x_name:" << x_name << ", is data: " << graph->Has(x_name); + VLOG(3) << "y_name:" << y_name << ", is data: " << graph->Has(y_name); + CHECK(graph->Has(x_name)) << "[NPU] MatMul in HiAI DDK only support X is data, Y is const yet."; // X node which supports persistable and non-persistable tensor, and // reshape to (m, k) - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); - auto reshaped_x_node = graph->AddNode(x_name + "/reshape"); - reshaped_x_node->set_input_tensor(*x_node); - reshaped_x_node->set_attr_shape({m, k}); - reshaped_x_node->set_attr_axis(0); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); + auto reshaped_x_node = graph->Add(x_name + "/reshape"); + auto reshaped_x_op = reshaped_x_node->data(); + reshaped_x_op->set_input_tensor(*x_node->data()); + reshaped_x_op->set_attr_shape({m, k}); + reshaped_x_op->set_attr_axis(0); x_node = reshaped_x_node; } else { - auto x_const_node = graph->AddNode(x_name, *x, {m, k}); - x_node = x_const_node; + x_node = graph->Add(x_name, *x, {m, k}); } // Y node which only supports persistable tensor, and reshape to // (k,n) - std::shared_ptr y_node = nullptr; - if (graph->HasNode(y_name)) { - y_node = graph->GetNode(y_name); - auto reshaped_y_node = graph->AddNode(y_name + "/reshape"); - reshaped_y_node->set_input_tensor(*y_node); - reshaped_y_node->set_attr_shape({k, n}); - reshaped_y_node->set_attr_axis(0); + std::shared_ptr y_node = nullptr; + if (graph->Has(y_name)) { + y_node = graph->Get(y_name); + auto reshaped_y_node = graph->Add(y_name + "/reshape"); + auto reshaped_y_op = reshaped_y_node->data(); + reshaped_y_op->set_input_tensor(*y_node->data()); + reshaped_y_op->set_attr_shape({k, n}); + reshaped_y_op->set_attr_axis(0); y_node = reshaped_y_node; } else { - auto y_const_node = graph->AddNode(y_name, *y, {k, n}); - y_node = y_const_node; + y_node = graph->Add(y_name, *y, {k, n}); } // Matmul node - auto mul_node = graph->AddNode(out_name); - mul_node->set_input_x1(*x_node); - mul_node->set_input_x2(*y_node); + auto mul_node = graph->Add(out_name); + auto mul_op = mul_node->data(); + mul_op->set_input_x1(*x_node->data()); + mul_op->set_input_x2(*y_node->data()); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -103,4 +104,4 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, mul, paddle::lite::subgraph::npu::MulConverter); +REGISTER_SUBGRAPH_BRIDGE(mul, kNPU, paddle::lite::subgraph::npu::MulConverter); diff --git a/lite/kernels/npu/bridges/pad2d_op.cc b/lite/kernels/npu/bridges/pad2d_op.cc index 451f48b1df6c7fedf0505ad5c6165f2d43514966..e6852da78776808dfa7d0f9a75b1b2fe077190b6 100644 --- a/lite/kernels/npu/bridges/pad2d_op.cc +++ b/lite/kernels/npu/bridges/pad2d_op.cc @@ -45,35 +45,34 @@ int Pad2dConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK_EQ(padding.size(), 4); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Padding node int xds = x_dims.size(); padding.insert(padding.begin(), xds * 2 - 4, 0); - auto padding_const_node = - graph->AddNode(out_name + "/padding", padding, {xds, 2}); + auto padding_node = graph->Add(out_name + "/padding", padding, {xds, 2}); // Pad node - auto pad2d_node = graph->AddNode(out_name); - pad2d_node->set_input_x(*x_node); - pad2d_node->set_input_padding(*padding_const_node); + auto pad2d_node = graph->Add(out_name); + auto pad2d_op = pad2d_node->data(); + pad2d_op->set_input_x(*x_node->data()); + pad2d_op->set_input_padding(*padding_node->data()); auto mode = op_info->GetAttr("mode"); if (mode == "constant") { // Pad value node auto pad_value = op_info->GetAttr("pad_value"); - auto pad_value_const_node = - graph->AddNode(out_name + "/pad_value", pad_value); - pad2d_node->set_input_constant_values(*pad_value_const_node); - pad2d_node->set_attr_T(0); // type of pad_value: 0:float 3:int32 - pad2d_node->set_attr_mode(0); + auto pad_value_node = graph->Add(out_name + "/pad_value", pad_value); + pad2d_op->set_input_constant_values(*pad_value_node->data()); + pad2d_op->set_attr_T(0); // type of pad_value: 0:float 3:int32 + pad2d_op->set_attr_mode(0); } else if (mode == "reflect") { LOG(WARNING) << "[NPU] pad mode " << mode << " isn't supported in HiAI DDK"; - pad2d_node->set_attr_mode(1); + pad2d_op->set_attr_mode(1); return FAILED; } else { LOG(WARNING) << "[NPU] pad mode " << mode << " isn't supported in HiAI DDK"; @@ -87,6 +86,6 @@ int Pad2dConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - pad2d, +REGISTER_SUBGRAPH_BRIDGE(pad2d, + kNPU, paddle::lite::subgraph::npu::Pad2dConverter); diff --git a/lite/kernels/npu/bridges/paddle_use_bridges.h b/lite/kernels/npu/bridges/paddle_use_bridges.h index d6fc5353385fd86b9ae682769684720f4e0ad57c..9e68771c601fae706735464dd75f1bade4b2cdb4 100644 --- a/lite/kernels/npu/bridges/paddle_use_bridges.h +++ b/lite/kernels/npu/bridges/paddle_use_bridges.h @@ -14,40 +14,40 @@ #pragma once -USE_SUBGRAPH_BRIDGE(NPU, sigmoid); -USE_SUBGRAPH_BRIDGE(NPU, relu); -USE_SUBGRAPH_BRIDGE(NPU, tanh); -USE_SUBGRAPH_BRIDGE(NPU, relu_clipped); -USE_SUBGRAPH_BRIDGE(NPU, leaky_relu); -USE_SUBGRAPH_BRIDGE(NPU, softsign); -USE_SUBGRAPH_BRIDGE(NPU, hard_sigmoid); +USE_SUBGRAPH_BRIDGE(sigmoid, kNPU); +USE_SUBGRAPH_BRIDGE(relu, kNPU); +USE_SUBGRAPH_BRIDGE(tanh, kNPU); +USE_SUBGRAPH_BRIDGE(relu_clipped, kNPU); +USE_SUBGRAPH_BRIDGE(leaky_relu, kNPU); +USE_SUBGRAPH_BRIDGE(softsign, kNPU); +USE_SUBGRAPH_BRIDGE(hard_sigmoid, kNPU); -USE_SUBGRAPH_BRIDGE(NPU, batch_norm); -USE_SUBGRAPH_BRIDGE(NPU, concat); -USE_SUBGRAPH_BRIDGE(NPU, conv2d); -USE_SUBGRAPH_BRIDGE(NPU, depthwise_conv2d); -USE_SUBGRAPH_BRIDGE(NPU, conv2d_transpose); +USE_SUBGRAPH_BRIDGE(batch_norm, kNPU); +USE_SUBGRAPH_BRIDGE(concat, kNPU); +USE_SUBGRAPH_BRIDGE(conv2d, kNPU); +USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kNPU); +USE_SUBGRAPH_BRIDGE(conv2d_transpose, kNPU); -USE_SUBGRAPH_BRIDGE(NPU, elementwise_add); -USE_SUBGRAPH_BRIDGE(NPU, fusion_elementwise_add_activation); -USE_SUBGRAPH_BRIDGE(NPU, elementwise_sub); -USE_SUBGRAPH_BRIDGE(NPU, elementwise_mul); -USE_SUBGRAPH_BRIDGE(NPU, elementwise_div); +USE_SUBGRAPH_BRIDGE(elementwise_add, kNPU); +USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kNPU); +USE_SUBGRAPH_BRIDGE(elementwise_sub, kNPU); +USE_SUBGRAPH_BRIDGE(elementwise_mul, kNPU); +USE_SUBGRAPH_BRIDGE(elementwise_div, kNPU); -USE_SUBGRAPH_BRIDGE(NPU, fc); -USE_SUBGRAPH_BRIDGE(NPU, bilinear_interp); -USE_SUBGRAPH_BRIDGE(NPU, nearest_interp); -USE_SUBGRAPH_BRIDGE(NPU, mul); -USE_SUBGRAPH_BRIDGE(NPU, pad2d); -USE_SUBGRAPH_BRIDGE(NPU, pool2d); -USE_SUBGRAPH_BRIDGE(NPU, reduce_mean); -USE_SUBGRAPH_BRIDGE(NPU, reshape); -USE_SUBGRAPH_BRIDGE(NPU, reshape2); -USE_SUBGRAPH_BRIDGE(NPU, scale); -USE_SUBGRAPH_BRIDGE(NPU, shuffle_channel); -USE_SUBGRAPH_BRIDGE(NPU, softmax); -USE_SUBGRAPH_BRIDGE(NPU, split); -USE_SUBGRAPH_BRIDGE(NPU, sqrt); -USE_SUBGRAPH_BRIDGE(NPU, square); -USE_SUBGRAPH_BRIDGE(NPU, transpose); -USE_SUBGRAPH_BRIDGE(NPU, transpose2); +USE_SUBGRAPH_BRIDGE(fc, kNPU); +USE_SUBGRAPH_BRIDGE(bilinear_interp, kNPU); +USE_SUBGRAPH_BRIDGE(nearest_interp, kNPU); +USE_SUBGRAPH_BRIDGE(mul, kNPU); +USE_SUBGRAPH_BRIDGE(pad2d, kNPU); +USE_SUBGRAPH_BRIDGE(pool2d, kNPU); +USE_SUBGRAPH_BRIDGE(reduce_mean, kNPU); +USE_SUBGRAPH_BRIDGE(reshape, kNPU); +USE_SUBGRAPH_BRIDGE(reshape2, kNPU); +USE_SUBGRAPH_BRIDGE(scale, kNPU); +USE_SUBGRAPH_BRIDGE(shuffle_channel, kNPU); +USE_SUBGRAPH_BRIDGE(softmax, kNPU); +USE_SUBGRAPH_BRIDGE(split, kNPU); +USE_SUBGRAPH_BRIDGE(sqrt, kNPU); +USE_SUBGRAPH_BRIDGE(square, kNPU); +USE_SUBGRAPH_BRIDGE(transpose, kNPU); +USE_SUBGRAPH_BRIDGE(transpose2, kNPU); diff --git a/lite/kernels/npu/bridges/pool_op.cc b/lite/kernels/npu/bridges/pool_op.cc index 8b108fc4ee07308c95a3a5d53173d14c8fa457d5..42349d18398b2f95003c859b15a32b707f97742a 100644 --- a/lite/kernels/npu/bridges/pool_op.cc +++ b/lite/kernels/npu/bridges/pool_op.cc @@ -48,11 +48,11 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto paddings = op_info->GetAttr>("paddings"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // pool mode @@ -109,19 +109,19 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // Pooling node - auto pool_node = graph->AddNode(out_name); - pool_node->set_input_x(*x_node); - pool_node->set_attr_mode(mode); - pool_node->set_attr_pad_mode(pad_mode); - pool_node->set_attr_global_pooling(global_pooling); - pool_node->set_attr_window( - ge::AttrValue::LIST_INT(ksize.begin(), ksize.end())); - pool_node->set_attr_pad(ge::AttrValue::LIST_INT{ + auto pool_node = graph->Add(out_name); + auto pool_op = pool_node->data(); + pool_op->set_input_x(*x_node->data()); + pool_op->set_attr_mode(mode); + pool_op->set_attr_pad_mode(pad_mode); + pool_op->set_attr_global_pooling(global_pooling); + pool_op->set_attr_window(ge::AttrValue::LIST_INT(ksize.begin(), ksize.end())); + pool_op->set_attr_pad(ge::AttrValue::LIST_INT{ paddings[0], paddings[1], paddings[2], paddings[3]}); - pool_node->set_attr_stride( + pool_op->set_attr_stride( ge::AttrValue::LIST_INT(strides.begin(), strides.end())); - pool_node->set_attr_ceil_mode(ceil_mode); - // pool_node->set_attr_data_mode(data_mode); + pool_op->set_attr_ceil_mode(ceil_mode); + // pool_op->set_attr_data_mode(data_mode); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -130,6 +130,6 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - pool2d, +REGISTER_SUBGRAPH_BRIDGE(pool2d, + kNPU, paddle::lite::subgraph::npu::PoolConverter); diff --git a/lite/kernels/npu/bridges/reduce_mean_op.cc b/lite/kernels/npu/bridges/reduce_mean_op.cc index 6c7f29fb271956937a2c71ce79f07a4931fb9a5f..29f065675c742978638fbbb68c71dd451ca35f37 100644 --- a/lite/kernels/npu/bridges/reduce_mean_op.cc +++ b/lite/kernels/npu/bridges/reduce_mean_op.cc @@ -52,29 +52,30 @@ int ReduceMeanConverter(void* ctx, OpLite* op, KernelBase* kernel) { std::sort(dim.begin(), dim.end()); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Using ReduceSum + Scale to implement ReduceMean // Dim node - auto dim_const_node = graph->AddNode(out_name + "/dim", dim); + auto dim_node = graph->Add(out_name + "/dim", dim); // Reduce Sum node - auto reduce_sum_node = - graph->AddNode(out_name + "/reducesum"); - reduce_sum_node->set_input_x(*x_node); - reduce_sum_node->set_input_w(*dim_const_node); - reduce_sum_node->set_attr_keep_dims(keep_dim); + auto reduce_sum_node = graph->Add(out_name + "/reducesum"); + auto reduce_sum_op = reduce_sum_node->data(); + reduce_sum_op->set_input_x(*x_node->data()); + reduce_sum_op->set_input_w(*dim_node->data()); + reduce_sum_op->set_attr_keep_dims(keep_dim); // Scale node - auto scale_node = graph->AddNode(out_name); - scale_node->set_input_x(*reduce_sum_node); - scale_node->set_attr_axis(1); + auto scale_node = graph->Add(out_name); + auto scale_op = scale_node->data(); + scale_op->set_input_x(*reduce_sum_node->data()); + scale_op->set_attr_axis(1); // Add filter node(fill with scale) float scale = 1; @@ -95,9 +96,8 @@ int ReduceMeanConverter(void* ctx, OpLite* op, KernelBase* kernel) { remove(scale_bias_shape.begin(), scale_bias_shape.end(), kDelFlag), scale_bias_shape.end()); } - auto filter_const_node = - graph->AddNode(out_name + "/filter", scale, scale_bias_shape); - scale_node->set_input_filter(*filter_const_node); + auto filter_node = graph->Add(out_name + "/filter", scale, scale_bias_shape); + scale_op->set_input_filter(*filter_node->data()); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -106,6 +106,6 @@ int ReduceMeanConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - reduce_mean, +REGISTER_SUBGRAPH_BRIDGE(reduce_mean, + kNPU, paddle::lite::subgraph::npu::ReduceMeanConverter); diff --git a/lite/kernels/npu/bridges/registry.cc b/lite/kernels/npu/bridges/registry.cc index 5f89bcb313ded16c207b305e3265a0a60cd81ecb..39181ccee97372f9952acea1e0523d4100e0843b 100644 --- a/lite/kernels/npu/bridges/registry.cc +++ b/lite/kernels/npu/bridges/registry.cc @@ -24,27 +24,27 @@ Registry& Registry::Instance() { return x; } -void Registry::Insert(const std::string& dev_type, - const std::string& op_type, +void Registry::Insert(const std::string& op_type, + const std::string& target, const cvt_func_type& cvt_func_name) { - auto it = map_.find(dev_type); + auto it = map_.find(target); if (it == map_.end()) { map_.insert(std::make_pair( - dev_type, std::unordered_map())); + target, std::unordered_map())); } - map_.at(dev_type).insert(std::make_pair(op_type, cvt_func_name)); + map_.at(target).insert(std::make_pair(op_type, cvt_func_name)); } -const cvt_func_type& Registry::Select(const std::string& dev_type, - const std::string& op_type) const { - return map_.at(dev_type).at(op_type); +const cvt_func_type& Registry::Select(const std::string& op_type, + const std::string& target) const { + return map_.at(target).at(op_type); } -bool Registry::Exists(const std::string& dev_type, - const std::string& op_type) const { - bool found = map_.find(dev_type) != map_.end(); +bool Registry::Exists(const std::string& op_type, + const std::string& target) const { + bool found = map_.find(target) != map_.end(); if (found) { - found = map_.at(dev_type).find(op_type) != map_.at(dev_type).end(); + found = map_.at(target).find(op_type) != map_.at(target).end(); } return found; } diff --git a/lite/kernels/npu/bridges/registry.h b/lite/kernels/npu/bridges/registry.h index 5198a3f8f2c0a684f15c8d7a27e5ecb3902ed43a..77132b6fc38785db274a555cafbc5eb96a4ca0b5 100644 --- a/lite/kernels/npu/bridges/registry.h +++ b/lite/kernels/npu/bridges/registry.h @@ -42,12 +42,12 @@ class Registry { public: static Registry& Instance(); - void Insert(const std::string& dev_type, - const std::string& op_type, + void Insert(const std::string& op_type, + const std::string& target, const cvt_func_type& cvt_func_name); - const cvt_func_type& Select(const std::string& dev_type, - const std::string& op_type) const; - bool Exists(const std::string& dev_type, const std::string& op_type) const; + const cvt_func_type& Select(const std::string& op_type, + const std::string& target) const; + bool Exists(const std::string& op_type, const std::string& target) const; Registry() = default; private: @@ -73,18 +73,18 @@ class Registry { __test_global_namespace_##uniq_name##__>::value, \ msg) -#define REGISTER_SUBGRAPH_BRIDGE(dev_type, op_type, cvt_func_name) \ +#define REGISTER_SUBGRAPH_BRIDGE(op_type__, target__, cvt_func_name) \ STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE( \ - __reg_subgraph_bridge_##dev_type##_##op_type##__, \ + __reg_subgraph_bridge_##op_type__##_##target__##__, \ "REGISTER_SUBGRAPH_BRIDGE must be called in global namespace only " \ "once!"); \ - int __reg_subgraph_bridge_##dev_type##_##op_type##_Insert() { \ + int __reg_subgraph_bridge_##op_type__##_##target__##_Insert() { \ paddle::lite::subgraph::Registry::Instance().Insert( \ - #dev_type, #op_type, cvt_func_name); \ + #op_type__, #target__, cvt_func_name); \ return 0; \ } -#define USE_SUBGRAPH_BRIDGE(dev_type, op_type) \ - extern int __reg_subgraph_bridge_##dev_type##_##op_type##_Insert(); \ - static int __reg_subgraph_bridge_##dev_type##_##op_type##_Insert_return \ - UNUSED = __reg_subgraph_bridge_##dev_type##_##op_type##_Insert(); +#define USE_SUBGRAPH_BRIDGE(op_type__, target__) \ + extern int __reg_subgraph_bridge_##op_type__##_##target__##_Insert(); \ + static int __reg_subgraph_bridge_##op_type__##_##target__##_Insert_return \ + UNUSED = __reg_subgraph_bridge_##op_type__##_##target__##_Insert(); diff --git a/lite/kernels/npu/bridges/reshape_op.cc b/lite/kernels/npu/bridges/reshape_op.cc index d5100dee4a415e28d5479fed9100ea2afc69ef02..14f256006699573c662f6a6ac5a29bbba74f9d9b 100644 --- a/lite/kernels/npu/bridges/reshape_op.cc +++ b/lite/kernels/npu/bridges/reshape_op.cc @@ -44,16 +44,17 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(out_type->layout() == DATALAYOUT(kNCHW)); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Reshape node - auto reshape_node = graph->AddNode(out_name); - reshape_node->set_input_tensor(*x_node); + auto reshape_node = graph->Add(out_name); + auto reshape_op = reshape_node->data(); + reshape_op->set_input_tensor(*x_node->data()); // Read shape from "ShapeTensor"(input), or "Shape"(input), or "shape"(attr) if (HasInputArg(op_info, scope, "ShapeTensor")) { @@ -64,9 +65,9 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { // auto actual_shape_type = kernel->GetInputDeclType("Shape"); // CHECK(actual_shape_type->precision() == PRECISION(kInt32)); // CHECK(actual_shape_type->layout() == DATALAYOUT(kNCHW)); - std::shared_ptr actual_shape_node = nullptr; - if (graph->HasNode(actual_shape_name)) { - actual_shape_node = graph->GetNode(actual_shape_name); + std::shared_ptr actual_shape_node = nullptr; + if (graph->Has(actual_shape_name)) { + actual_shape_node = graph->Get(actual_shape_name); } else { auto actual_shape = scope->FindMutableTensor(actual_shape_name); auto actual_shape_dims = actual_shape->dims(); @@ -81,12 +82,11 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { "but Shape has " << out_shape.size(); } - auto actual_shape_const_node = - graph->AddNode(actual_shape_name, - std::vector(out_shape.begin(), out_shape.end())); - actual_shape_node = actual_shape_const_node; + actual_shape_node = + graph->Add(actual_shape_name, + std::vector(out_shape.begin(), out_shape.end())); } - reshape_node->set_input_w(*actual_shape_node); + reshape_op->set_input_w(*actual_shape_node->data()); } else { auto shape = op_info->GetAttr>("shape"); auto out_dims = lite::operators::ValidateShape(shape, x_dims); @@ -96,7 +96,7 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { "but shape has " << out_shape.size(); } - reshape_node->set_attr_shape( + reshape_op->set_attr_shape( ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end())); } @@ -117,9 +117,10 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { // auto xshape_type = kernel->GetOutputDeclType("XShape"); // CHECK(xshape_type->precision() == PRECISION(kFloat)); // CHECK(xshape_type->layout() == DATALAYOUT(kNCHW)); - auto xshape_node = graph->AddNode(xshape_name); - xshape_node->set_input_tensor(*x_node); - xshape_node->set_attr_shape( + auto xshape_node = graph->Add(xshape_name); + auto xshape_op = xshape_node->data(); + xshape_op->set_input_tensor(*x_node->data()); + xshape_op->set_attr_shape( ge::AttrValue::LIST_INT(xshape_dims.begin(), xshape_dims.end())); } return REBUILD_WHEN_SHAPE_CHANGED; @@ -130,9 +131,9 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - reshape, +REGISTER_SUBGRAPH_BRIDGE(reshape, + kNPU, paddle::lite::subgraph::npu::ReshapeConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - reshape2, +REGISTER_SUBGRAPH_BRIDGE(reshape2, + kNPU, paddle::lite::subgraph::npu::ReshapeConverter); diff --git a/lite/kernels/npu/bridges/scale_op.cc b/lite/kernels/npu/bridges/scale_op.cc index ca04996faf82e875bbacc0119f431d48d4e1f657..d0139a9e2fd580f3143e9ad9809ed924e6e949a4 100644 --- a/lite/kernels/npu/bridges/scale_op.cc +++ b/lite/kernels/npu/bridges/scale_op.cc @@ -37,12 +37,15 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(x_type->layout() == DATALAYOUT(kNCHW)); auto x = scope->FindMutableTensor(x_name); auto x_dims = x->dims(); - CHECK_GE(x_dims.size(), 2); + auto x_rank = x_dims.size(); + CHECK_GE(x_rank, 2); auto out_name = op_info->Output("Out").front(); auto out_type = kernel->GetOutputDeclType("Out"); CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->layout() == DATALAYOUT(kNCHW)); - std::vector scale_bias_shape = {x_dims[1]}; + // HiAI only support [n, c, 1, 1] for the shape of scale and bias + std::vector scale_bias_shape = { + 1, x_rank < 3 ? 1 : x_dims[x_rank - 3], 1, 1}; float scale = op_info->GetAttr("scale"); float bias = op_info->GetAttr("bias"); bool bias_after_scale = op_info->GetAttr("bias_after_scale"); @@ -51,29 +54,28 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x, CvtShape(x_dims)); } // Scale node - auto scale_node = graph->AddNode(out_name); - scale_node->set_input_x(*x_node); - scale_node->set_attr_axis(1); + auto scale_node = graph->Add(out_name); + auto scale_op = scale_node->data(); + scale_op->set_input_x(*x_node->data()); + scale_op->set_attr_axis(1); // Add filter node(fill with scale) - auto filter_const_node = - graph->AddNode(out_name + "/filter", scale, scale_bias_shape); - scale_node->set_input_filter(*filter_const_node); + auto filter_node = graph->Add(out_name + "/filter", scale, scale_bias_shape); + scale_op->set_input_filter(*filter_node->data()); // Add bias node(fill with bias) if (fabs(bias) > 1e-6f) { - auto bias_const_node = - graph->AddNode(out_name + "/bias", bias, scale_bias_shape); - scale_node->set_input_bias(*bias_const_node); - scale_node->set_attr_has_bias_value(true); + auto bias_node = graph->Add(out_name + "/bias", bias, scale_bias_shape); + scale_op->set_input_bias(*bias_node->data()); + scale_op->set_attr_has_bias_value(true); } return REBUILD_WHEN_SHAPE_CHANGED; } @@ -83,6 +85,6 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - scale, +REGISTER_SUBGRAPH_BRIDGE(scale, + kNPU, paddle::lite::subgraph::npu::ScaleConverter); diff --git a/lite/kernels/npu/bridges/shuffle_channel_op.cc b/lite/kernels/npu/bridges/shuffle_channel_op.cc index 47469e1506d2d49d8db2ac08c38e7b66762666a0..0552bd2382041bde155b661abc053e8680dbcd3e 100644 --- a/lite/kernels/npu/bridges/shuffle_channel_op.cc +++ b/lite/kernels/npu/bridges/shuffle_channel_op.cc @@ -44,17 +44,19 @@ int ShuffleChannelConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto group = op_info->GetAttr("group"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Shuffle Channel node - auto shuffle_channel_node = graph->AddNode(out_name); - shuffle_channel_node->set_input_x(*x_node); - shuffle_channel_node->set_attr_group(group); + auto shuffle_channel_node = graph->Add(out_name); + auto shuffle_channel_op = + shuffle_channel_node->data(); + shuffle_channel_op->set_input_x(*x_node->data()); + shuffle_channel_op->set_attr_group(group); return SUCCESS; } @@ -63,6 +65,6 @@ int ShuffleChannelConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - shuffle_channel, +REGISTER_SUBGRAPH_BRIDGE(shuffle_channel, + kNPU, paddle::lite::subgraph::npu::ShuffleChannelConverter); diff --git a/lite/kernels/npu/bridges/softmax_op.cc b/lite/kernels/npu/bridges/softmax_op.cc index 01d8b0a944aeb806eb28447402629993c8e13c62..24bbb790e08b4b0ff675173af8faad3b07f8f2e0 100644 --- a/lite/kernels/npu/bridges/softmax_op.cc +++ b/lite/kernels/npu/bridges/softmax_op.cc @@ -37,29 +37,34 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(x_type->layout() == DATALAYOUT(kNCHW)); auto x = scope->FindMutableTensor(x_name); auto x_dims = x->dims(); + auto x_rank = x_dims.size(); auto out_name = op_info->Output("Out").front(); auto out_type = kernel->GetOutputDeclType("Out"); CHECK(out_type->precision() == PRECISION(kFloat)); CHECK(out_type->layout() == DATALAYOUT(kNCHW)); auto axis = op_info->GetAttr("axis"); - if (x_dims.size() > 3) { - CHECK(!(axis == 2 && x_dims[3] > 1)) - << "[NPU] Unsupported softmax params: axis = " << axis - << " :x_w = " << x_dims[3]; + if (axis < 0) { + axis += x_rank; + } + if (axis == 2 && x_rank > 3 && x_dims[3] != 1) { + LOG(WARNING) << "[NPU] Unsupported softmax params: axis = " << axis + << " :x_w = " << x_dims[3]; + return FAILED; } // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Softmax node - auto softmax_node = graph->AddNode(out_name); - softmax_node->set_input_x(*x_node); - softmax_node->set_attr_axis(axis); + auto softmax_node = graph->Add(out_name); + auto softmax_op = softmax_node->data(); + softmax_op->set_input_x(*x_node->data()); + softmax_op->set_attr_axis(axis); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -68,6 +73,6 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - softmax, +REGISTER_SUBGRAPH_BRIDGE(softmax, + kNPU, paddle::lite::subgraph::npu::SoftmaxConverter); diff --git a/lite/kernels/npu/bridges/split_op.cc b/lite/kernels/npu/bridges/split_op.cc index 597de04d5bc520fab2c76218a3b625cc885a22e3..2cdf49fd540bc40ceaaa45df4a6ac65bf94f172a 100644 --- a/lite/kernels/npu/bridges/split_op.cc +++ b/lite/kernels/npu/bridges/split_op.cc @@ -47,33 +47,34 @@ int SplitConverter(void* ctx, OpLite* op, KernelBase* kernel) { int64_t sections_num = static_cast(sections.size()); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Split node - auto split_node = graph->AddNode(op_type + "/" + x_name); - split_node->set_input_x(*x_node); - split_node->set_attr_axis(static_cast(axis)); + auto split_node = graph->Add(op_type + "/" + x_name); + auto split_op = split_node->data(); + split_op->set_input_x(*x_node->data()); + split_op->set_attr_axis(static_cast(axis)); if (num > 0) { - split_node->set_attr_output_num(static_cast(num)); + split_op->set_attr_output_num(static_cast(num)); } else { - split_node->set_attr_output_num(sections_num); + split_op->set_attr_output_num(sections_num); auto size_split = ge::AttrValue::LIST_INT(sections.begin(), sections.end()); - split_node->set_attr_size_split(size_split); + split_op->set_attr_size_split(size_split); } - split_node->create_dynamic_output_y(out_names.size()); + split_op->create_dynamic_output_y(out_names.size()); int idx = 1; for (auto& out_name : out_names) { - auto zero_const_node = - graph->AddNode(out_name + "/zero" + std::to_string(idx), 0); - auto add_node = graph->AddNode(out_name); - add_node->set_input_x1(*split_node, "y" + std::to_string(idx)); - add_node->set_input_x2(*zero_const_node); + auto zero_node = graph->Add(out_name + "/zero" + std::to_string(idx), 0); + auto add_node = graph->Add(out_name); + auto add_op = add_node->data(); + add_op->set_input_x1(*split_node->data(), "y" + std::to_string(idx)); + add_op->set_input_x2(*zero_node->data()); idx++; } return REBUILD_WHEN_SHAPE_CHANGED; @@ -84,6 +85,6 @@ int SplitConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - split, +REGISTER_SUBGRAPH_BRIDGE(split, + kNPU, paddle::lite::subgraph::npu::SplitConverter); diff --git a/lite/kernels/npu/bridges/sqrt_op.cc b/lite/kernels/npu/bridges/sqrt_op.cc index 2ee58862fb8c78ceca10ca2af3435d85e31d68fd..e8fde2272a28823763f096e087be5f024734cf1b 100644 --- a/lite/kernels/npu/bridges/sqrt_op.cc +++ b/lite/kernels/npu/bridges/sqrt_op.cc @@ -43,16 +43,17 @@ int SqrtConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(out_type->layout() == DATALAYOUT(kNCHW)); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Sqrt node - auto sqrt_node = graph->AddNode(out_name); - sqrt_node->set_input_x(*x_node); + auto sqrt_node = graph->Add(out_name); + auto sqrt_op = sqrt_node->data(); + sqrt_op->set_input_x(*x_node->data()); return SUCCESS; } @@ -61,4 +62,6 @@ int SqrtConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, sqrt, paddle::lite::subgraph::npu::SqrtConverter); +REGISTER_SUBGRAPH_BRIDGE(sqrt, + kNPU, + paddle::lite::subgraph::npu::SqrtConverter); diff --git a/lite/kernels/npu/bridges/square_op.cc b/lite/kernels/npu/bridges/square_op.cc index 3f6676c8a8fa87452d3961a22f3a8d6c2f9619ad..f03c7690cb490556fe6b26a132454ca109f41310 100644 --- a/lite/kernels/npu/bridges/square_op.cc +++ b/lite/kernels/npu/bridges/square_op.cc @@ -43,16 +43,17 @@ int SquareConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(out_type->layout() == DATALAYOUT(kNCHW)); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Square node - auto square_node = graph->AddNode(out_name); - square_node->set_input_x(*x_node); + auto square_node = graph->Add(out_name); + auto square_op = square_node->data(); + square_op->set_input_x(*x_node->data()); return SUCCESS; } @@ -61,6 +62,6 @@ int SquareConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - square, +REGISTER_SUBGRAPH_BRIDGE(square, + kNPU, paddle::lite::subgraph::npu::SquareConverter); diff --git a/lite/kernels/npu/bridges/transpose_op.cc b/lite/kernels/npu/bridges/transpose_op.cc index 70449dac7acd0622d454ac742d0c16d85c1bc954..f758ababaca16a9187ca5ea416c84704b09fc19f 100644 --- a/lite/kernels/npu/bridges/transpose_op.cc +++ b/lite/kernels/npu/bridges/transpose_op.cc @@ -41,19 +41,20 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto axis = op_info->GetAttr>("axis"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Transpose node - auto transpose_node = graph->AddNode(out_name); - transpose_node->set_input_x(*x_node); - auto w_const_node = graph->AddNode(out_name + "/w", 1.0f); - transpose_node->set_input_w(*w_const_node); - transpose_node->set_attr_order( + auto transpose_node = graph->Add(out_name); + auto transpose_op = transpose_node->data(); + transpose_op->set_input_x(*x_node->data()); + auto w_node = graph->Add(out_name + "/w", 1.0f); + transpose_op->set_input_w(*w_node->data()); + transpose_op->set_attr_order( ge::AttrValue::LIST_INT(axis.begin(), axis.end())); return SUCCESS; } @@ -63,9 +64,9 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - transpose, +REGISTER_SUBGRAPH_BRIDGE(transpose, + kNPU, paddle::lite::subgraph::npu::TransposeConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - transpose2, +REGISTER_SUBGRAPH_BRIDGE(transpose2, + kNPU, paddle::lite::subgraph::npu::TransposeConverter); diff --git a/lite/kernels/npu/bridges/unsqueeze_op.cc b/lite/kernels/npu/bridges/unsqueeze_op.cc index 8ff95d4ed805f8e125ec0ed7f6fa7f94e02a4f91..e69453d7506e4548bc9cf292430f643d5ce2ba9e 100644 --- a/lite/kernels/npu/bridges/unsqueeze_op.cc +++ b/lite/kernels/npu/bridges/unsqueeze_op.cc @@ -45,17 +45,18 @@ int UnsqueezeConverter(void* ctx, OpLite* op, KernelBase* kernel) { << "[NPU] unsqueeze not support axes from tensor now"; // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Unsqueeze node - auto unsqueeze_node = graph->AddNode(out_name); - unsqueeze_node->set_input_tensor(*x_node); - unsqueeze_node->set_attr_shape( + auto unsqueeze_node = graph->Add(out_name); + auto unsqueeze_op = unsqueeze_node->data(); + unsqueeze_op->set_input_tensor(*x_node->data()); + unsqueeze_op->set_attr_shape( ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end())); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -65,9 +66,9 @@ int UnsqueezeConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(NPU, - unsqueeze, +REGISTER_SUBGRAPH_BRIDGE(unsqueeze, + kNPU, paddle::lite::subgraph::npu::UnsqueezeConverter); -REGISTER_SUBGRAPH_BRIDGE(NPU, - unsqueeze2, +REGISTER_SUBGRAPH_BRIDGE(unsqueeze2, + kNPU, paddle::lite::subgraph::npu::UnsqueezeConverter); diff --git a/lite/kernels/npu/bridges/utility.cc b/lite/kernels/npu/bridges/utility.cc index f79936c5d7b5350d96abc5617d856b595951eb71..4efcd4feae41403945d3410ee94856412575d79a 100644 --- a/lite/kernels/npu/bridges/utility.cc +++ b/lite/kernels/npu/bridges/utility.cc @@ -85,6 +85,22 @@ ge::Format CvtDataLayoutType(DataLayoutType itype) { return otype; } +std::vector CvtShape(const std::vector& in_shape) { + std::vector out_shape; + // Padding the shape to 4-dimensions(NCHW) + for (int i = 0; i < 4 - in_shape.size(); i++) { + out_shape.push_back(1); + } + for (int i = 0; i < in_shape.size(); i++) { + out_shape.push_back(in_shape[i]); + } + return out_shape; +} + +std::vector CvtShape(const DDim& in_dims) { + return CvtShape(in_dims.Vectorize()); +} + ge::TensorPtr CvtTensor(const Tensor& in_tensor, std::vector out_shape, PrecisionType in_precision, diff --git a/lite/kernels/npu/bridges/utility.h b/lite/kernels/npu/bridges/utility.h index e8300a0472d8b672bab467fe1fbba7a2113ba254..0336cf272a24c8828922ea683f877f9fd165a65c 100644 --- a/lite/kernels/npu/bridges/utility.h +++ b/lite/kernels/npu/bridges/utility.h @@ -19,12 +19,12 @@ #include #include #include -#include "ai_ddk_lib/include/graph/buffer.h" -#include "ai_ddk_lib/include/graph/graph.h" -#include "ai_ddk_lib/include/graph/model.h" -#include "ai_ddk_lib/include/graph/op/all_ops.h" -#include "ai_ddk_lib/include/graph/operator.h" -#include "ai_ddk_lib/include/graph/operator_reg.h" +#include "graph/buffer.h" +#include "graph/graph.h" +#include "graph/model.h" +#include "graph/op/all_ops.h" +#include "graph/operator.h" +#include "graph/operator_reg.h" #include "lite/core/op_lite.h" #include "lite/utils/macros.h" @@ -70,59 +70,16 @@ ge::DataType CvtPrecisionType(PrecisionType itype); ge::Format CvtDataLayoutType(DataLayoutType itype); +// Padding the shape to 4-dimensions(NCHW) for HiAI +std::vector CvtShape(const std::vector& in_shape); + +std::vector CvtShape(const DDim& in_dims); + ge::TensorPtr CvtTensor(const Tensor& in_tensor, std::vector out_shape = {}, PrecisionType in_precision = PRECISION(kFloat), DataLayoutType in_layout = DATALAYOUT(kNCHW)); -template -ge::TensorPtr CreateTensorAndFillData(const std::vector& data, - std::vector shape = {}, - ge::Format format = ge::FORMAT_NCHW) { - const std::type_info& info = typeid(T); - ge::DataType type = ge::DT_FLOAT; - if (info == typeid(float)) { - type = ge::DT_FLOAT; - } else if (info == typeid(int8_t)) { - type = ge::DT_INT8; - } else if (info == typeid(int16_t)) { - type = ge::DT_INT16; - } else if (info == typeid(int32_t)) { - type = ge::DT_INT32; - } else if (info == typeid(int64_t)) { - type = ge::DT_INT64; - } else { - LOG(FATAL) << "[NPU] Unknow value type " << info.name(); - } - if (shape.empty()) { - shape = {static_cast(data.size())}; - } else { - int size = 1; - for (auto i : shape) { - size *= i; - } - CHECK_EQ(data.size(), size); - } - ge::TensorDesc desc(ge::Shape(shape), format, type); - ge::TensorPtr tensor = std::make_shared(); - tensor->SetTensorDesc(desc); - tensor->SetData(reinterpret_cast(data.data()), - data.size() * sizeof(T)); - return tensor; -} - -template -ge::TensorPtr CreateTensorAndFillData(T value, - std::vector shape = {1}, - ge::Format format = ge::FORMAT_NCHW) { - int64_t size = 1; - for (auto i : shape) { - size *= i; - } - std::vector data(size, value); - return CreateTensorAndFillData(data, shape, format); -} - int CvtActMode(std::string act_type); } // namespace npu diff --git a/lite/kernels/npu/subgraph_compute.cc b/lite/kernels/npu/subgraph_compute.cc index d9b191950668660ae2b76b70ac2b5c12aece92c0..6f32099274f449ab51ce0f4751f99d33c3d7cd72 100644 --- a/lite/kernels/npu/subgraph_compute.cc +++ b/lite/kernels/npu/subgraph_compute.cc @@ -16,7 +16,7 @@ #include #include #include -#include "ai_ddk_lib/include/hiai_ir_build.h" +#include "hiai_ir_build.h" // NOLINT #include "lite/backends/npu/device.h" #include "lite/core/op_registry.h" #include "lite/kernels/npu/bridges/graph.h" @@ -39,13 +39,13 @@ int SubgraphEngine::BuildDeviceProgram() { op->CheckShape(); op->InferShape(); std::string op_type = op->op_info()->Type(); - if (!bridges.Exists("NPU", op_type)) { + if (!bridges.Exists(op_type, "kNPU")) { return subgraph::FAILED; } auto kernel = inst.kernel(); - status |= bridges.Select("NPU", op_type)(reinterpret_cast(&graph), - const_cast(op), - const_cast(kernel)); + status |= bridges.Select(op_type, "kNPU")(reinterpret_cast(&graph), + const_cast(op), + const_cast(kernel)); if (subgraph::CHECK_FAILED(status)) { return subgraph::FAILED; } @@ -57,26 +57,26 @@ int SubgraphEngine::BuildDeviceProgram() { std::vector device_inodes; std::vector device_onodes; for (auto& input_name : input_names_) { - if (graph.HasNode(input_name)) { - if (!graph.GetType(input_name).persistable()) { - device_inodes.push_back(*graph.GetNode(input_name)); + if (graph.Has(input_name)) { + if (graph.Get(input_name)->is_data()) { + device_inodes.push_back(*graph.Get(input_name)->data()); device_inames_.push_back(input_name); } else { LOG(WARNING) << "[NPU] Input node " << input_name - << " is skipped because it is a persistable node."; + << " is ignored because it is not a data node."; } } else { LOG(WARNING) << "[NPU] Input node " << input_name - << " is skipped because it does not exist."; + << " is ignored because it does not exist."; } } for (auto& output_name : output_names_) { - if (graph.HasNode(output_name)) { - device_onodes.push_back(*graph.GetNode(output_name)); + if (graph.Has(output_name)) { + device_onodes.push_back(*graph.Get(output_name)->data()); device_onames_.push_back(output_name); } else { LOG(WARNING) << "[NPU] Output node " << output_name - << " is skipped because it does not exist."; + << " is ignored because it does not exist."; } } CHECK(!device_inames_.empty()) @@ -108,14 +108,14 @@ int SubgraphEngine::BuildDeviceProgram() { origin_otensors_.resize(device_onames_.size()); device_otensors_.resize(device_onames_.size()); for (int i = 0; i < device_inames_.size(); i++) { - auto type = graph.GetType(device_inames_[i]); - auto precision = type.precision(); - auto layout = type.layout(); + auto node = graph.Get(device_inames_[i]); + auto precision = node->precision(); + auto layout = node->layout(); origin_itensors_[i] = scope_->FindMutableTensor(device_inames_[i]); CHECK(origin_itensors_[i]); origin_idims_[i] = origin_itensors_[i]->dims(); - VLOG(3) << "[NPU] Inputs[" << i - << "] precision: " << PrecisionToStr(precision) + VLOG(3) << "[NPU] Inputs[" << i << "] name: " << device_inames_[i] + << " precision: " << PrecisionToStr(precision) << " layout: " << DataLayoutToStr(layout) << " dims: {" << device_idims[i].GetNumber() << "," << device_idims[i].GetChannel() << "," @@ -129,14 +129,14 @@ int SubgraphEngine::BuildDeviceProgram() { device_itensors_[i]->Init(&(device_idims[i])); } for (int i = 0; i < device_onames_.size(); i++) { - auto type = graph.GetType(device_onames_[i]); - auto precision = type.precision(); - auto layout = type.layout(); + auto node = graph.Get(device_onames_[i]); + auto precision = node->precision(); + auto layout = node->layout(); origin_otensors_[i] = scope_->FindMutableTensor(device_onames_[i]); CHECK(origin_otensors_[i]); origin_odims_[i] = origin_otensors_[i]->dims(); - VLOG(3) << "[NPU] Outputs[" << i - << "] precision: " << PrecisionToStr(precision) + VLOG(3) << "[NPU] Outputs[" << i << "] name: " << device_onames_[i] + << " precision: " << PrecisionToStr(precision) << " layout: " << DataLayoutToStr(layout) << " dims: {" << device_odims[i].GetNumber() << "," << device_odims[i].GetChannel() << "," diff --git a/lite/kernels/npu/subgraph_compute.h b/lite/kernels/npu/subgraph_compute.h index 27b4a36cfeadf6cca328fb9c980d53c9c5e79095..2cdc4a0e62fe748a8b1d1dfb8f90c17b1d36e869 100644 --- a/lite/kernels/npu/subgraph_compute.h +++ b/lite/kernels/npu/subgraph_compute.h @@ -17,7 +17,7 @@ #include #include #include -#include "ai_ddk_lib/include/HiAiModelManagerService.h" +#include "HiAiModelManagerService.h" #include "lite/core/kernel.h" #include "lite/kernels/npu/bridges/engine.h" #include "lite/kernels/npu/bridges/registry.h" diff --git a/lite/kernels/xpu/bridges/act_op.cc b/lite/kernels/xpu/bridges/act_op.cc index f674af84caac466cfe2b06e32360dacccd2bdf5e..e3d4588aa2aed1268a8e15f654019031a5202542 100644 --- a/lite/kernels/xpu/bridges/act_op.cc +++ b/lite/kernels/xpu/bridges/act_op.cc @@ -43,20 +43,21 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(out_type->layout() == DATALAYOUT(kNCHW)); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Act node if (op_type == "relu") { - graph->AddNode(out_name, graph->builder_.CreateRelu(*x_node)); + graph->Add(out_name, graph->builder_.CreateRelu(*x_node->data())); } else if (op_type == "tanh") { - graph->AddNode(out_name, graph->builder_.CreateUnaryOp("tanh", *x_node)); + graph->Add(out_name, + graph->builder_.CreateUnaryOp("tanh", *x_node->data())); } else if (op_type == "gelu") { - graph->AddNode(out_name, graph->builder_.CreateGelu(*x_node)); + graph->Add(out_name, graph->builder_.CreateGelu(*x_node->data())); } else { // TODO(hong19860320) supports more activation ops LOG(WARNING) << "[XPU] Unsupported activation type " << op_type; @@ -70,6 +71,6 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, relu, paddle::lite::subgraph::xpu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(XPU, tanh, paddle::lite::subgraph::xpu::ActConverter); -REGISTER_SUBGRAPH_BRIDGE(XPU, gelu, paddle::lite::subgraph::xpu::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(relu, kXPU, paddle::lite::subgraph::xpu::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(tanh, kXPU, paddle::lite::subgraph::xpu::ActConverter); +REGISTER_SUBGRAPH_BRIDGE(gelu, kXPU, paddle::lite::subgraph::xpu::ActConverter); diff --git a/lite/kernels/xpu/bridges/batch_norm_op.cc b/lite/kernels/xpu/bridges/batch_norm_op.cc index 980f241660c6cec6856d738197407dd866e36ed3..20478dbdfbfdf305989a1c3b3832ed1a29fe62f8 100644 --- a/lite/kernels/xpu/bridges/batch_norm_op.cc +++ b/lite/kernels/xpu/bridges/batch_norm_op.cc @@ -64,28 +64,28 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto epsilon = op_info->GetAttr("epsilon"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Scale, Bias, Mean, Variance node - auto scale_const_node = graph->AddNode(scale_name, *scale); - auto bias_const_node = graph->AddNode(bias_name, *bias); - auto mean_const_node = graph->AddNode(mean_name, *mean); - auto variance_const_node = graph->AddNode(variance_name, *variance); + auto scale_node = graph->Add(scale_name, *scale); + auto bias_node = graph->Add(bias_name, *bias); + auto mean_node = graph->Add(mean_name, *mean); + auto variance_node = graph->Add(variance_name, *variance); // Batch Norm node and extract the first field as the output node - auto batch_norm_node = graph->builder_.CreateBatchNorm(*x_node, - *scale_const_node, - *bias_const_node, - *mean_const_node, - *variance_const_node, + auto batch_norm_data = graph->builder_.CreateBatchNorm(*x_node->data(), + *scale_node->data(), + *bias_node->data(), + *mean_node->data(), + *variance_node->data(), 1, epsilon); - graph->AddNode(y_name, graph->builder_.GetField(batch_norm_node, 0)); + graph->Add(y_name, graph->builder_.GetField(batch_norm_data, 0)); return SUCCESS; } @@ -94,6 +94,6 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - batch_norm, +REGISTER_SUBGRAPH_BRIDGE(batch_norm, + kXPU, paddle::lite::subgraph::xpu::BatchNormConverter); diff --git a/lite/kernels/xpu/bridges/conv_op.cc b/lite/kernels/xpu/bridges/conv_op.cc index 5e9e5448a1f9bcf4d4fd17b8e8a1d5529c14c59d..fe9c598847977e87d87950c3850d3e1d074958b2 100644 --- a/lite/kernels/xpu/bridges/conv_op.cc +++ b/lite/kernels/xpu/bridges/conv_op.cc @@ -61,11 +61,11 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK_EQ(dilations.size(), 2L); // Input node - std::shared_ptr input_node = nullptr; - if (graph->HasNode(input_name)) { - input_node = graph->GetNode(input_name); + std::shared_ptr input_node = nullptr; + if (graph->Has(input_name)) { + input_node = graph->Get(input_name); } else { - input_node = graph->AddNode(input_name, input_dims); + input_node = graph->Add(input_name, *input); } if (paddings.size() == 2L) { @@ -99,7 +99,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { DDim output_dims(output_shape); // Filter node - auto filter_const_node = graph->AddNode(filter_name, *filter); + auto filter_node = graph->Add(filter_name, *filter); // Conv node auto conv_attrs = xtcl::make_node(); @@ -114,9 +114,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { conv_attrs->out_layout = ""; // conv_attrs->out_dtype = ""; auto conv_node = - graph->AddNode(output_name, - graph->builder_.CreateConv2D( - *input_node, *filter_const_node, conv_attrs)); + graph->Add(output_name, + graph->builder_.CreateConv2D( + *input_node->data(), *filter_node->data(), conv_attrs)); // Add bias node if exists bias // supports the bias nodes with the following dimensions @@ -149,30 +149,27 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { << " isn't supported in conv2d Op when output dimension is " << output_dims; } - std::shared_ptr bias_node = nullptr; - if (graph->HasNode(bias_name)) { - // Bias node from input node - bias_node = graph->GetNode(bias_name); + std::shared_ptr bias_node = nullptr; + if (graph->Has(bias_name)) { + bias_node = graph->Get(bias_name); } else { - // Bias node with const data - bias_node = graph->AddNode(bias_name, *bias, bias_shape); + bias_node = graph->Add(bias_name, *bias, bias_shape); } - std::shared_ptr add_node = nullptr; if (is_channel_bias) { - add_node = graph->AddNode( - output_name, - graph->builder_.CreateBiasAdd(*conv_node, 1, *bias_node)); + conv_node = graph->Add(output_name, + graph->builder_.CreateBiasAdd( + *conv_node->data(), 1, *bias_node->data())); } else { - add_node = graph->AddNode( - output_name, - graph->builder_.CreateBinaryOp("add", *conv_node, *bias_node)); + conv_node = + graph->Add(output_name, + graph->builder_.CreateBinaryOp( + "add", *conv_node->data(), *bias_node->data())); } - conv_node = add_node; } if (fuse_relu) { // Append relu node if fuse_relu is true - graph->AddNode(output_name, graph->builder_.CreateRelu(*conv_node)); + graph->Add(output_name, graph->builder_.CreateRelu(*conv_node->data())); } return REBUILD_WHEN_SHAPE_CHANGED; } @@ -182,9 +179,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - conv2d, +REGISTER_SUBGRAPH_BRIDGE(conv2d, + kXPU, paddle::lite::subgraph::xpu::ConvConverter); -REGISTER_SUBGRAPH_BRIDGE(XPU, - depthwise_conv2d, +REGISTER_SUBGRAPH_BRIDGE(depthwise_conv2d, + kXPU, paddle::lite::subgraph::xpu::ConvConverter); diff --git a/lite/kernels/xpu/bridges/dropout_op.cc b/lite/kernels/xpu/bridges/dropout_op.cc index ae81facd536042c2e6f3bd273fe89a14938fb7bc..df869e17ff5626f03d6eb988a1687bb51c75d440 100644 --- a/lite/kernels/xpu/bridges/dropout_op.cc +++ b/lite/kernels/xpu/bridges/dropout_op.cc @@ -46,21 +46,21 @@ int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) { op_info->GetAttr("dropout_implementation"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Dropout node if (dropout_implementation == "downgrade_in_infer") { - graph->AddNode( - out_name, - graph->builder_.CreateScale(*x_node, 1.f - dropout_prob, 0.0f, false)); + graph->Add(out_name, + graph->builder_.CreateScale( + *x_node->data(), 1.f - dropout_prob, 0.0f, false)); } else if (dropout_implementation == "upscale_in_train") { - graph->AddNode(out_name, - graph->builder_.CreateScale(*x_node, 1.0f, 0.0f, false)); + graph->Add(out_name, + graph->builder_.CreateScale(*x_node->data(), 1.0f, 0.0f, false)); } else { LOG(WARNING) << "[XPU] Unsupported dropout_implementation == " << dropout_implementation << " for dropout"; @@ -74,6 +74,6 @@ int DropoutConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - dropout, +REGISTER_SUBGRAPH_BRIDGE(dropout, + kXPU, paddle::lite::subgraph::xpu::DropoutConverter); diff --git a/lite/kernels/xpu/bridges/elementwise_ops.cc b/lite/kernels/xpu/bridges/elementwise_ops.cc index 49a42c55d66fc72eb62f3c04cb53a2efbba89238..7fcae312b9776afa7e3b1cbd1bd17bd25b2e4aab 100644 --- a/lite/kernels/xpu/bridges/elementwise_ops.cc +++ b/lite/kernels/xpu/bridges/elementwise_ops.cc @@ -50,29 +50,31 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto axis = op_info->GetAttr("axis"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Y node - std::shared_ptr y_node = nullptr; - if (graph->HasNode(y_name)) { - y_node = graph->GetNode(y_name); + std::shared_ptr y_node = nullptr; + if (graph->Has(y_name)) { + y_node = graph->Get(y_name); } else { - y_node = graph->AddNode(y_name, y_dims); + y_node = graph->Add(y_name, *y); } // Elementwise node - std::shared_ptr elementwise_node = nullptr; + std::shared_ptr elt_node = nullptr; if (y_dims.size() == 1) { - elementwise_node = graph->AddNode( - out_name, graph->builder_.CreateBiasAdd(*x_node, axis, *y_node)); + elt_node = graph->Add( + out_name, + graph->builder_.CreateBiasAdd(*x_node->data(), axis, *y_node->data())); } else if (x_dims.size() == y_dims.size()) { - elementwise_node = graph->AddNode( - out_name, graph->builder_.CreateBinaryOp("add", *x_node, *y_node)); + elt_node = graph->Add(out_name, + graph->builder_.CreateBinaryOp( + "add", *x_node->data(), *y_node->data())); } else { LOG(WARNING) << "[XPU] elementwise_add only support y of one dimension, or x " @@ -88,6 +90,6 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - elementwise_add, +REGISTER_SUBGRAPH_BRIDGE(elementwise_add, + kXPU, paddle::lite::subgraph::xpu::ElementwiseConverter); diff --git a/lite/kernels/xpu/bridges/gather_op.cc b/lite/kernels/xpu/bridges/gather_op.cc index 06d1c67b0d1419192e4c8ed6219f79a8c010a06b..b427769f1ecae29ba8fe9740baf074e87c41e8c1 100644 --- a/lite/kernels/xpu/bridges/gather_op.cc +++ b/lite/kernels/xpu/bridges/gather_op.cc @@ -54,38 +54,39 @@ int GatherConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto out_dims = out->dims(); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Index node - std::shared_ptr index_node = nullptr; - if (graph->HasNode(index_name)) { - index_node = graph->GetNode(index_name); + std::shared_ptr index_node = nullptr; + if (graph->Has(index_name)) { + index_node = graph->Get(index_name); } else { - index_node = graph->AddNode( - index_name, index_dims, index_type->precision(), index_type->layout()); + index_node = graph->Add( + index_name, *index, index_type->precision(), index_type->layout()); } // Flatten index node if (index_dims.size() != 1) { index_node = - graph->AddNode(index_name + "/reshape", - graph->builder_.CreateReshape(*index_node, {-1}), - index_type->precision(), - index_type->layout()); + graph->Add(index_name + "/reshape", + graph->builder_.CreateReshape(*index_node->data(), {-1}), + index_type->precision(), + index_type->layout()); } // Reshape the gather node with the inferred shape as the output node - auto gather_node = graph->AddNode( - out_name, - graph->builder_.CreateGather(*x_node, *index_node, /* axis= */ 0)); + auto gather_node = + graph->Add(out_name, + graph->builder_.CreateGather( + *x_node->data(), *index_node->data(), /* axis= */ 0)); if (out_dims.size() != 2) { - graph->AddNode(out_name, - graph->builder_.CreateReshape( - *gather_node, CvtShape(out_dims))); + graph->Add(out_name, + graph->builder_.CreateReshape( + *gather_node->data(), CvtShape(out_dims))); } return SUCCESS; } @@ -95,6 +96,6 @@ int GatherConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - gather, +REGISTER_SUBGRAPH_BRIDGE(gather, + kXPU, paddle::lite::subgraph::xpu::GatherConverter); diff --git a/lite/kernels/xpu/bridges/graph.cc b/lite/kernels/xpu/bridges/graph.cc index 1691e4b0c50d6fe1d606a0d744a42a4afeae1aa8..4a310e62bd3def52d56d8d39fc7c0350e74fff78 100644 --- a/lite/kernels/xpu/bridges/graph.cc +++ b/lite/kernels/xpu/bridges/graph.cc @@ -21,71 +21,71 @@ namespace lite { namespace subgraph { namespace xpu { -std::shared_ptr Graph::AddNode(const std::string& name, - const xtcl::xExpr& layer, - PrecisionType precision, - DataLayoutType layout) { - auto unique_name = [&](const std::string& key) { - int idx = 1; - auto it = counts_.find(key); - if (it == counts_.end()) { - counts_.insert(std::make_pair(key, idx)); - } else { - idx = ++(it->second); - } - return key + "_" + std::to_string(idx); - }; +int Graph::Add(const std::string& name, std::shared_ptr node) { auto it = nodes_.find(name); if (it != nodes_.end()) { - // Only variable can rebind the name - CHECK(!it->second.second.persistable()) << "[XPU] Node " << name - << " redefined."; - // Generate a new unique name as the key to bind the origin node if the - // origin node isn't a const node: new_name->node - nodes_.insert(std::make_pair(unique_name(name + "_var"), it->second)); - nodes_.erase(it); + // Only variable node can be shared with the same name + if (!node->is_var() || !it->second.back()->is_var()) { + LOG(FATAL) << "[XPU] Const or data node " << name << " is redefined."; + return -1; + } + } else { + auto ret = nodes_.insert( + std::make_pair(name, std::vector>())); + CHECK(ret.second); + it = ret.first; } - // Create a new node and bind with the name: name->new_node - auto node = std::make_shared(layer); - nodes_.insert(std::make_pair( - name, std::make_pair(node, Type(precision, layout, false)))); - builder_.SetLayer(unique_name(name + "_op")); - return node; + it->second.push_back(node); + return it->second.size(); } -// Const node -std::shared_ptr Graph::AddNode(const std::string& name, - const Tensor& tensor, - PrecisionType precision, - DataLayoutType layout) { - return AddNode(name, tensor, tensor.dims().Vectorize(), precision, layout); +// Variable node +std::shared_ptr Graph::Add(const std::string& name, + const xtcl::xExpr& layer, + PrecisionType precision, + DataLayoutType layout) { + auto node = std::make_shared(precision, layout, Node::Role::kVar); + auto idx = Add(name, node); + CHECK_GE(idx, 1); + node->set_data(std::make_shared(layer)); + // Generate a unique name for the current XTCL layer + builder_.SetLayer(name + "__" + std::to_string(idx)); + return node; } -std::shared_ptr Graph::AddNode(const std::string& name, - const Tensor& tensor, - std::vector shape, - PrecisionType precision, - DataLayoutType layout) { - CHECK(!HasNode(name)) << "[NPU] Node " << name << " redefined."; - auto node = std::make_shared(builder_.CreateTensor( - name, CvtShape(shape), CvtPrecisionType(precision))); - nodes_.insert(std::make_pair( - name, std::make_pair(node, Type(precision, layout, true)))); - params_.emplace( - std::make_pair(name, *CvtTensor(tensor, shape, precision, layout))); +// Const or data node +std::shared_ptr Graph::Add(const std::string& name, + const Tensor& tensor, + std::vector shape, + PrecisionType precision, + DataLayoutType layout) { + std::shared_ptr node = nullptr; + if (tensor.persistable()) { + // Const node + node = std::make_shared(precision, layout, Node::Role::kConst); + auto idx = Add(name, node); + CHECK_EQ(idx, 1); + node->set_data(std::make_shared(builder_.CreateTensor( + name, CvtShape(shape), CvtPrecisionType(precision)))); + params_.emplace( + std::make_pair(name, *CvtTensor(tensor, shape, precision, layout))); + } else { + // Data node + node = Add(name, shape, precision, layout); + } return node; } // Data node -std::shared_ptr Graph::AddNode(const std::string& name, - std::vector shape, - PrecisionType precision, - DataLayoutType layout) { - CHECK(!HasNode(name)) << "[NPU] Node " << name << " redefined."; - auto node = std::make_shared(builder_.CreateTensor( - name, CvtShape(shape), CvtPrecisionType(precision))); - nodes_.insert(std::make_pair( - name, std::make_pair(node, Type(precision, layout, false)))); +std::shared_ptr Graph::Add(const std::string& name, + std::vector shape, + PrecisionType precision, + DataLayoutType layout) { + auto node = std::make_shared(precision, layout, Node::Role::kData); + auto idx = Add(name, node); + CHECK_EQ(idx, 1); + node->set_data(std::make_shared(builder_.CreateTensor( + name, CvtShape(shape), CvtPrecisionType(precision)))); return node; } diff --git a/lite/kernels/xpu/bridges/graph.h b/lite/kernels/xpu/bridges/graph.h index 3107346851037a5e0ed4b8d709de836dc582b8b8..2e1f4ae8d8f57cc41f000f65f65a7f949c02a6e7 100644 --- a/lite/kernels/xpu/bridges/graph.h +++ b/lite/kernels/xpu/bridges/graph.h @@ -28,67 +28,81 @@ namespace lite { namespace subgraph { namespace xpu { -// Type of graph nodes -class Type { +// Graph and node is defined to collect all of converted XTCL IR nodes +class Node { public: - Type(PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW), - bool persistable = false) - : precision_(precision), layout_(layout), persistable_(persistable) {} - + enum class Role { + kVar = 0, + kConst, + kData, + }; + + Node(std::shared_ptr data, + PrecisionType precision, + DataLayoutType layout, + Role role) + : data_(data), precision_(precision), layout_(layout), role_(role) {} + Node(PrecisionType precision, DataLayoutType layout, Role role) + : precision_(precision), layout_(layout), role_(role) {} + + void set_data(std::shared_ptr data) { data_ = data; } void set_precision(PrecisionType precision) { precision_ = precision; } void set_layout(DataLayoutType layout) { layout_ = layout; } - void set_persistable(bool persistable) { persistable_ = persistable; } + void set_role(Role role) { role_ = role; } + std::shared_ptr data() { return data_; } PrecisionType precision() const { return precision_; } DataLayoutType layout() const { return layout_; } - bool persistable() const { return persistable_; } + Role role() const { return role_; } + bool is_var() const { return role_ == Role::kVar; } + bool is_const() const { return role_ == Role::kConst; } + bool is_data() const { return role_ == Role::kData; } private: + std::shared_ptr data_{nullptr}; PrecisionType precision_{PRECISION(kFloat)}; DataLayoutType layout_{DATALAYOUT(kNCHW)}; - bool persistable_{false}; + Role role_{Role::kVar}; }; -// Graph to collect all of converted XPU IR nodes class Graph { public: - // Layer node - std::shared_ptr AddNode( - const std::string& name, - const xtcl::xExpr& layer, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)); + int Add(const std::string& name, std::shared_ptr node); + + // Variable node + std::shared_ptr Add(const std::string& name, + const xtcl::xExpr& layer, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)); + + // Const or data node + std::shared_ptr Add(const std::string& name, + const Tensor& tensor, + std::vector shape, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)); + + std::shared_ptr Add(const std::string& name, + const Tensor& tensor, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, tensor, tensor.dims().Vectorize(), precision, layout); + } - // Const node - std::shared_ptr AddNode( - const std::string& name, - const Tensor& tensor, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)); - - std::shared_ptr AddNode( - const std::string& name, - const Tensor& tensor, - std::vector shape, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)); - - std::shared_ptr AddNode( - const std::string& name, - const Tensor& tensor, - DDim dims, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, tensor, dims.Vectorize(), precision, layout); + std::shared_ptr Add(const std::string& name, + const Tensor& tensor, + DDim dims, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, tensor, dims.Vectorize(), precision, layout); } + // Const node template - std::shared_ptr AddNode( - const std::string& name, - const std::vector& data, - std::vector shape = {}, - DataLayoutType layout = DATALAYOUT(kNCHW)) { + std::shared_ptr Add(const std::string& name, + const std::vector& data, + std::vector shape = {}, + DataLayoutType layout = DATALAYOUT(kNCHW)) { const std::type_info& info = typeid(T); PrecisionType precision = PRECISION(kFloat); if (info == typeid(float)) { @@ -111,70 +125,61 @@ class Graph { } Tensor tensor; tensor.Resize(shape); + tensor.set_persistable(true); std::memcpy(reinterpret_cast(tensor.mutable_data()), reinterpret_cast(data.data()), data.size() * sizeof(T)); - return AddNode(name, tensor, precision, layout); + return Add(name, tensor, precision, layout); } template - std::shared_ptr AddNode( - const std::string& name, - const std::vector& data, - DDim dims, - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, data, dims.Vectorize(), layout); + std::shared_ptr Add(const std::string& name, + const std::vector& data, + DDim dims, + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, data, dims.Vectorize(), layout); } template - std::shared_ptr AddNode( - const std::string& name, - T value, - std::vector shape = {1}, - DataLayoutType layout = DATALAYOUT(kNCHW)) { + std::shared_ptr Add(const std::string& name, + T value, + std::vector shape = {1}, + DataLayoutType layout = DATALAYOUT(kNCHW)) { int64_t size = 1; for (auto i : shape) { size *= i; } std::vector data(size, value); - return AddNode(name, data, shape, layout); + return Add(name, data, shape, layout); } template - std::shared_ptr AddNode( - const std::string& name, - T value, - DDim dims, - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, value, dims.Vectorize(), layout); + std::shared_ptr Add(const std::string& name, + T value, + DDim dims, + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, value, dims.Vectorize(), layout); } // Data node - std::shared_ptr AddNode( - const std::string& name, - std::vector shape, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)); - - std::shared_ptr AddNode( - const std::string& name, - DDim dims, - PrecisionType precision = PRECISION(kFloat), - DataLayoutType layout = DATALAYOUT(kNCHW)) { - return AddNode(name, dims.Vectorize(), precision, layout); - } - - std::shared_ptr GetNode(const std::string& name) { - CHECK(HasNode(name)) << "[XPU] Node " << name << " not found."; - return nodes_.at(name).first; + std::shared_ptr Add(const std::string& name, + std::vector shape, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)); + + std::shared_ptr Add(const std::string& name, + DDim dims, + PrecisionType precision = PRECISION(kFloat), + DataLayoutType layout = DATALAYOUT(kNCHW)) { + return Add(name, dims.Vectorize(), precision, layout); } - const Type& GetType(const std::string& name) { - CHECK(HasNode(name)) << "[XPU] Node " << name << " not found."; - return nodes_.at(name).second; + std::shared_ptr Get(const std::string& name) { + CHECK(Has(name)) << "[XPU] Node " << name << " not found."; + return nodes_.at(name).back(); } - bool HasNode(const std::string& name) { + bool Has(const std::string& name) { return nodes_.find(name) != nodes_.end(); } @@ -184,9 +189,7 @@ class Graph { xtcl::network::xTensorCompiler::ParamNDArrayMap params_; private: - std::unordered_map, Type>> - nodes_; - std::unordered_map counts_; + std::unordered_map>> nodes_; }; } // namespace xpu diff --git a/lite/kernels/xpu/bridges/layer_norm_op.cc b/lite/kernels/xpu/bridges/layer_norm_op.cc index 601dd42770e565bd638ffbc5bc4d71aff39cc721..3ad190b73f59d7f1decf01c52d24799550daaea8 100644 --- a/lite/kernels/xpu/bridges/layer_norm_op.cc +++ b/lite/kernels/xpu/bridges/layer_norm_op.cc @@ -51,23 +51,23 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto x_inner_size = x_dims.Slice(axis, x_rank).production(); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } if (reshape) { auto reshaped_x_dims = x_dims.Slice(0, axis).Vectorize(); reshaped_x_dims.push_back(x_inner_size); - x_node = - graph->AddNode(x_name + "/reshape", - graph->builder_.CreateReshape( - *x_node, CvtShape(reshaped_x_dims))); + x_node = graph->Add( + x_name + "/reshape", + graph->builder_.CreateReshape( + *x_node->data(), CvtShape(reshaped_x_dims))); } // Scale node - std::shared_ptr scale_const_node = nullptr; + std::shared_ptr scale_node = nullptr; if (HasInputArg(op_info, scope, "Scale")) { auto scale_name = op_info->Input("Scale").front(); auto scale_type = kernel->GetInputDeclType("Scale"); @@ -77,14 +77,13 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto scale_dims = scale->dims(); CHECK_EQ(scale_dims.size(), 1); CHECK_EQ(scale_dims.production(), x_inner_size); - scale_const_node = graph->AddNode(scale_name, *scale); + scale_node = graph->Add(scale_name, *scale); } else { - scale_const_node = - graph->AddNode(y_name + "/scale_one", 1.0f, {x_inner_size}); + scale_node = graph->Add(y_name + "/scale_one", 1.0f, {x_inner_size}); } // Bias node - std::shared_ptr bias_const_node = nullptr; + std::shared_ptr bias_node = nullptr; if (HasInputArg(op_info, scope, "Bias")) { auto bias_name = op_info->Input("Bias").front(); auto bias_type = kernel->GetInputDeclType("Bias"); @@ -94,26 +93,25 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto bias_dims = bias->dims(); CHECK_EQ(bias_dims.size(), 1); CHECK_EQ(bias_dims.production(), x_inner_size); - bias_const_node = graph->AddNode(bias_name, *bias); + bias_node = graph->Add(bias_name, *bias); } else { - bias_const_node = - graph->AddNode(y_name + "/bias_zero", 0.0f, {x_inner_size}); + bias_node = graph->Add(y_name + "/bias_zero", 0.0f, {x_inner_size}); } // Layer Norm node auto layer_norm_node = - graph->AddNode(y_name, - graph->builder_.CreateLayerNorm(*x_node, - *scale_const_node, - *bias_const_node, - axis, - epsilon, - true, - true)); + graph->Add(y_name, + graph->builder_.CreateLayerNorm(*x_node->data(), + *scale_node->data(), + *bias_node->data(), + axis, + epsilon, + true, + true)); if (reshape) { - graph->AddNode(y_name, - graph->builder_.CreateReshape( - *layer_norm_node, CvtShape(y_dims))); + graph->Add(y_name, + graph->builder_.CreateReshape(*layer_norm_node->data(), + CvtShape(y_dims))); } return REBUILD_WHEN_SHAPE_CHANGED; } @@ -123,6 +121,6 @@ int LayerNormConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - layer_norm, +REGISTER_SUBGRAPH_BRIDGE(layer_norm, + kXPU, paddle::lite::subgraph::xpu::LayerNormConverter); diff --git a/lite/kernels/xpu/bridges/lookup_table_op.cc b/lite/kernels/xpu/bridges/lookup_table_op.cc index a03e0c2d24deb691e1de464c62ea8ef76d76ddab..100ddadb2f361c1535a5c58392aae6e3fb3039b0 100644 --- a/lite/kernels/xpu/bridges/lookup_table_op.cc +++ b/lite/kernels/xpu/bridges/lookup_table_op.cc @@ -57,30 +57,34 @@ int LookupTableConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // Ids node - std::shared_ptr ids_node = nullptr; - if (graph->HasNode(ids_name)) { - ids_node = graph->GetNode(ids_name); + std::shared_ptr ids_node = nullptr; + if (graph->Has(ids_name)) { + ids_node = graph->Get(ids_name); } else { - ids_node = graph->AddNode( + ids_node = graph->Add( ids_name, ids_dims, ids_type->precision(), ids_type->layout()); } // Flatten Ids node if (ids_dims.size() != 1) { - ids_node = graph->AddNode(ids_name + "/reshape", - graph->builder_.CreateReshape(*ids_node, {-1}), - ids_type->precision(), - ids_type->layout()); + ids_node = + graph->Add(ids_name + "/reshape", + graph->builder_.CreateReshape(*ids_node->data(), {-1}), + ids_type->precision(), + ids_type->layout()); } - auto w_const_node = graph->AddNode(w_name, *w); + + // W node + auto w_node = graph->Add(w_name, *w); // Reshape the gather node with the inferred shape as the output node - auto gather_node = graph->AddNode( - out_name, - graph->builder_.CreateGather(*w_const_node, *ids_node, /* axis= */ 0)); + auto gather_node = + graph->Add(out_name, + graph->builder_.CreateGather( + *w_node->data(), *ids_node->data(), /* axis= */ 0)); if (out_dims.size() != 2) { - graph->AddNode(out_name, - graph->builder_.CreateReshape( - *gather_node, CvtShape(out_dims))); + graph->Add(out_name, + graph->builder_.CreateReshape( + *gather_node->data(), CvtShape(out_dims))); } return SUCCESS; } @@ -90,6 +94,6 @@ int LookupTableConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - lookup_table, +REGISTER_SUBGRAPH_BRIDGE(lookup_table, + kXPU, paddle::lite::subgraph::xpu::LookupTableConverter); diff --git a/lite/kernels/xpu/bridges/matmul_op.cc b/lite/kernels/xpu/bridges/matmul_op.cc index 330b336840148fa54d5c9f2eae39a08fdfad9557..c17ba8423c04eddf8b042c95e959d8b703c60c7a 100644 --- a/lite/kernels/xpu/bridges/matmul_op.cc +++ b/lite/kernels/xpu/bridges/matmul_op.cc @@ -57,19 +57,19 @@ int MatmulConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto alpha = op_info->GetAttr("alpha"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Y node - std::shared_ptr y_node = nullptr; - if (graph->HasNode(y_name)) { - y_node = graph->GetNode(y_name); + std::shared_ptr y_node = nullptr; + if (graph->Has(y_name)) { + y_node = graph->Get(y_name); } else { - y_node = graph->AddNode(y_name, y_dims); + y_node = graph->Add(y_name, *y); } // Matmul node @@ -80,52 +80,55 @@ int MatmulConverter(void* ctx, OpLite* op, KernelBase* kernel) { if (x_dims.size() != 3) { auto m = static_cast(x_dims[x_dims.size() - 2]); auto k = static_cast(x_dims[x_dims.size() - 1]); - x_node = - graph->AddNode(x_name + "/reshape", - graph->builder_.CreateReshape(*x_node, {-1, m, k})); + x_node = graph->Add( + x_name + "/reshape", + graph->builder_.CreateReshape(*x_node->data(), {-1, m, k})); if (transpose_x) { - x_node = - graph->AddNode(x_name + "/reshape/transpose", - graph->builder_.CreateTranspose(*x_node, {0, 2, 1})); + x_node = graph->Add( + x_name + "/reshape/transpose", + graph->builder_.CreateTranspose(*x_node->data(), {0, 2, 1})); } } // Reshape and transposed Y node if (y_dims.size() != 3) { auto k = static_cast(y_dims[y_dims.size() - 2]); auto n = static_cast(y_dims[y_dims.size() - 1]); - y_node = - graph->AddNode(y_name + "/reshape", - graph->builder_.CreateReshape(*y_node, {-1, k, n})); + y_node = graph->Add( + y_name + "/reshape", + graph->builder_.CreateReshape(*y_node->data(), {-1, k, n})); if (!transpose_y) { - y_node = - graph->AddNode(y_name + "/reshape/transpose", - graph->builder_.CreateTranspose(*y_node, {0, 2, 1})); + y_node = graph->Add( + y_name + "/reshape/transpose", + graph->builder_.CreateTranspose(*y_node->data(), {0, 2, 1})); } } // Matmul node - auto matmul_node = graph->AddNode( - out_name, graph->builder_.CreateBatchMatmul(*x_node, *y_node)); + auto matmul_node = graph->Add( + out_name, + graph->builder_.CreateBatchMatmul(*x_node->data(), *y_node->data())); if (fabs(alpha - 1) > 1e-6f) { - matmul_node = graph->AddNode( - out_name, graph->builder_.CreateScale(*matmul_node, alpha)); + matmul_node = graph->Add( + out_name, graph->builder_.CreateScale(*matmul_node->data(), alpha)); } if (out_dims.size() != 3) { - graph->AddNode(out_name, - graph->builder_.CreateReshape( - *matmul_node, CvtShape(out_dims))); + graph->Add(out_name, + graph->builder_.CreateReshape( + *matmul_node->data(), CvtShape(out_dims))); } } else if (x_dims.size() == 2 && y_dims.size() == 2) { // x: [M, K], y: [K, N], out: [M, N] if (transpose_x) { - x_node = graph->AddNode(x_name + "/transpose", - graph->builder_.CreateTranspose(*x_node, {1, 0})); + x_node = + graph->Add(x_name + "/transpose", + graph->builder_.CreateTranspose(*x_node->data(), {1, 0})); } - auto matmul_node = graph->AddNode( - out_name, - graph->builder_.CreateMatmul2D(*x_node, *y_node, transpose_y)); + auto matmul_node = + graph->Add(out_name, + graph->builder_.CreateMatmul2D( + *x_node->data(), *y_node->data(), transpose_y)); if (fabs(alpha - 1) > 1e-6f) { - matmul_node = graph->AddNode( - out_name, graph->builder_.CreateScale(*matmul_node, alpha)); + matmul_node = graph->Add( + out_name, graph->builder_.CreateScale(*matmul_node->data(), alpha)); } } else if (x_dims.size() == 1 && y_dims.size() == 1) { // x: [K], y: [K], out: [1] @@ -141,6 +144,6 @@ int MatmulConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - matmul, +REGISTER_SUBGRAPH_BRIDGE(matmul, + kXPU, paddle::lite::subgraph::xpu::MatmulConverter); diff --git a/lite/kernels/xpu/bridges/mul_op.cc b/lite/kernels/xpu/bridges/mul_op.cc index 40780557457e3ed9b99e1cec2b5bdead7f2564dd..e12f767d13e4c1e01b671f5a4f7ba712dd8a1ef5 100644 --- a/lite/kernels/xpu/bridges/mul_op.cc +++ b/lite/kernels/xpu/bridges/mul_op.cc @@ -56,49 +56,50 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK_EQ(x_matrix_dims[1], y_matrix_dims[0]); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Flatten X node if (x_dims.size() != 2) { - x_node = - graph->AddNode(x_name + "/reshape", - graph->builder_.CreateReshape( - *x_node, {-1, static_cast(x_matrix_dims[1])})); + x_node = graph->Add( + x_name + "/reshape", + graph->builder_.CreateReshape( + *x_node->data(), {-1, static_cast(x_matrix_dims[1])})); } // Y node - std::shared_ptr y_node = nullptr; - if (graph->HasNode(y_name)) { - y_node = graph->GetNode(y_name); + std::shared_ptr y_node = nullptr; + if (graph->Has(y_name)) { + y_node = graph->Get(y_name); } else { - y_node = graph->AddNode(y_name, y_dims); + y_node = graph->Add(y_name, *y); } // Flatten Y node if (y_dims.size() != 2) { - y_node = - graph->AddNode(y_name + "/reshape", - graph->builder_.CreateReshape( - *y_node, {static_cast(y_matrix_dims[0]), -1})); + y_node = graph->Add( + y_name + "/reshape", + graph->builder_.CreateReshape( + *y_node->data(), {static_cast(y_matrix_dims[0]), -1})); } // Reshape the matmul node with the inferred shape as the output node - auto matmul_node = graph->AddNode( - out_name, graph->builder_.CreateMatmul2D(*x_node, *y_node, false)); + auto matmul_node = graph->Add( + out_name, + graph->builder_.CreateMatmul2D(*x_node->data(), *y_node->data(), false)); if (out_dims.size() != 2) { - graph->AddNode(out_name, - graph->builder_.CreateReshape( - *matmul_node, CvtShape(out_dims))); + graph->Add(out_name, + graph->builder_.CreateReshape( + *matmul_node->data(), CvtShape(out_dims))); } return REBUILD_WHEN_SHAPE_CHANGED; -} +} // namespace xpu } // namespace xpu } // namespace subgraph } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, mul, paddle::lite::subgraph::xpu::MulConverter); +REGISTER_SUBGRAPH_BRIDGE(mul, kXPU, paddle::lite::subgraph::xpu::MulConverter); diff --git a/lite/kernels/xpu/bridges/paddle_use_bridges.h b/lite/kernels/xpu/bridges/paddle_use_bridges.h index 588fcdd6e4fdd0115a731ec9918b632b19052cfd..bed88034ae8c00cf2de4e747234c49283cc18c68 100644 --- a/lite/kernels/xpu/bridges/paddle_use_bridges.h +++ b/lite/kernels/xpu/bridges/paddle_use_bridges.h @@ -14,25 +14,25 @@ #pragma once -USE_SUBGRAPH_BRIDGE(XPU, relu); -USE_SUBGRAPH_BRIDGE(XPU, tanh); -USE_SUBGRAPH_BRIDGE(XPU, conv2d); -USE_SUBGRAPH_BRIDGE(XPU, depthwise_conv2d); -USE_SUBGRAPH_BRIDGE(XPU, elementwise_add); -USE_SUBGRAPH_BRIDGE(XPU, pool2d); -USE_SUBGRAPH_BRIDGE(XPU, softmax); -USE_SUBGRAPH_BRIDGE(XPU, mul); -USE_SUBGRAPH_BRIDGE(XPU, batch_norm); -USE_SUBGRAPH_BRIDGE(XPU, stack); -USE_SUBGRAPH_BRIDGE(XPU, gather); -USE_SUBGRAPH_BRIDGE(XPU, scale); -USE_SUBGRAPH_BRIDGE(XPU, lookup_table); -USE_SUBGRAPH_BRIDGE(XPU, slice); -USE_SUBGRAPH_BRIDGE(XPU, transpose); -USE_SUBGRAPH_BRIDGE(XPU, transpose2); -USE_SUBGRAPH_BRIDGE(XPU, reshape); -USE_SUBGRAPH_BRIDGE(XPU, reshape2); -USE_SUBGRAPH_BRIDGE(XPU, layer_norm); -USE_SUBGRAPH_BRIDGE(XPU, gelu); -USE_SUBGRAPH_BRIDGE(XPU, dropout); -USE_SUBGRAPH_BRIDGE(XPU, matmul); +USE_SUBGRAPH_BRIDGE(relu, kXPU); +USE_SUBGRAPH_BRIDGE(tanh, kXPU); +USE_SUBGRAPH_BRIDGE(conv2d, kXPU); +USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kXPU); +USE_SUBGRAPH_BRIDGE(elementwise_add, kXPU); +USE_SUBGRAPH_BRIDGE(pool2d, kXPU); +USE_SUBGRAPH_BRIDGE(softmax, kXPU); +USE_SUBGRAPH_BRIDGE(mul, kXPU); +USE_SUBGRAPH_BRIDGE(batch_norm, kXPU); +USE_SUBGRAPH_BRIDGE(stack, kXPU); +USE_SUBGRAPH_BRIDGE(gather, kXPU); +USE_SUBGRAPH_BRIDGE(scale, kXPU); +USE_SUBGRAPH_BRIDGE(lookup_table, kXPU); +USE_SUBGRAPH_BRIDGE(slice, kXPU); +USE_SUBGRAPH_BRIDGE(transpose, kXPU); +USE_SUBGRAPH_BRIDGE(transpose2, kXPU); +USE_SUBGRAPH_BRIDGE(reshape, kXPU); +USE_SUBGRAPH_BRIDGE(reshape2, kXPU); +USE_SUBGRAPH_BRIDGE(layer_norm, kXPU); +USE_SUBGRAPH_BRIDGE(gelu, kXPU); +USE_SUBGRAPH_BRIDGE(dropout, kXPU); +USE_SUBGRAPH_BRIDGE(matmul, kXPU); diff --git a/lite/kernels/xpu/bridges/pool_op.cc b/lite/kernels/xpu/bridges/pool_op.cc index 60787a342948251eb141daced2ba3cd2931a6da9..90653edcce26dd7da5ca0848368a98ea87a04c0d 100644 --- a/lite/kernels/xpu/bridges/pool_op.cc +++ b/lite/kernels/xpu/bridges/pool_op.cc @@ -50,21 +50,22 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto exclusive = op_info->GetAttr("exclusive"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Pool node if (pooling_type == "max") { if (global_pooling) { - graph->AddNode(out_name, graph->builder_.CreateGlobalMaxPool2D(*x_node)); + graph->Add(out_name, + graph->builder_.CreateGlobalMaxPool2D(*x_node->data())); } else { - graph->AddNode( + graph->Add( out_name, - graph->builder_.CreateMaxPool2D(*x_node, + graph->builder_.CreateMaxPool2D(*x_node->data(), CvtShape(ksize), CvtShape(strides), CvtShape(paddings), @@ -73,12 +74,13 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { } } else if (pooling_type == "avg") { if (global_pooling) { - graph->AddNode(out_name, graph->builder_.CreateGlobalAvgPool2D(*x_node)); + graph->Add(out_name, + graph->builder_.CreateGlobalAvgPool2D(*x_node->data())); } else { // !exclusive ---> count_include_pad - graph->AddNode( + graph->Add( out_name, - graph->builder_.CreateAvgPool2D(*x_node, + graph->builder_.CreateAvgPool2D(*x_node->data(), CvtShape(ksize), CvtShape(strides), CvtShape(paddings), @@ -98,6 +100,6 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - pool2d, +REGISTER_SUBGRAPH_BRIDGE(pool2d, + kXPU, paddle::lite::subgraph::xpu::PoolConverter); diff --git a/lite/kernels/xpu/bridges/reshape_op.cc b/lite/kernels/xpu/bridges/reshape_op.cc index eeee6c7244d7686a6c07734ffcfedcf46c92c195..e21d005c7b387071da6316d46ff0d3f8105d73f2 100644 --- a/lite/kernels/xpu/bridges/reshape_op.cc +++ b/lite/kernels/xpu/bridges/reshape_op.cc @@ -44,11 +44,11 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(out_type->layout() == DATALAYOUT(kNCHW)); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } std::vector shape; @@ -59,6 +59,7 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { // CHECK(shape_tensor_type->layout() == DATALAYOUT(kNCHW)); for (auto shape_tensor_name : shape_tensor_names) { auto shape_tensor = scope->FindMutableTensor(shape_tensor_name); + CHECK(shape_tensor->persistable()); auto shape_tensor_data = shape_tensor->mutable_data(); shape.emplace_back(shape_tensor_data[0]); } @@ -73,6 +74,7 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { // CHECK(actual_shape_type->precision() == PRECISION(kInt32)); // CHECK(actual_shape_type->layout() == DATALAYOUT(kNCHW)); auto actual_shape = scope->FindMutableTensor(actual_shape_name); + CHECK(actual_shape->persistable()); auto actual_shape_dims = actual_shape->dims(); auto actual_shape_data = actual_shape->mutable_data(); auto shape = std::vector( @@ -86,9 +88,9 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto out_dims = operators::ValidateShape(shape, x_dims); // Reshape node - graph->AddNode(out_name, - graph->builder_.CreateReshape( - *x_node, CvtShape(out_dims))); + graph->Add(out_name, + graph->builder_.CreateReshape(*x_node->data(), + CvtShape(out_dims))); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -97,9 +99,9 @@ int ReshapeConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - reshape2, +REGISTER_SUBGRAPH_BRIDGE(reshape2, + kXPU, paddle::lite::subgraph::xpu::ReshapeConverter); -REGISTER_SUBGRAPH_BRIDGE(XPU, - reshape, +REGISTER_SUBGRAPH_BRIDGE(reshape, + kXPU, paddle::lite::subgraph::xpu::ReshapeConverter); diff --git a/lite/kernels/xpu/bridges/scale_op.cc b/lite/kernels/xpu/bridges/scale_op.cc index a3423d290c271b9d7caf1cafdf59c5069def7a11..e6871390ac2690fa2e439ae56e59e49f342777e4 100644 --- a/lite/kernels/xpu/bridges/scale_op.cc +++ b/lite/kernels/xpu/bridges/scale_op.cc @@ -46,17 +46,17 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { float bias = op_info->GetAttr("bias"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Scale node - graph->AddNode( - out_name, - graph->builder_.CreateScale(*x_node, scale, bias, bias_after_scale)); + graph->Add(out_name, + graph->builder_.CreateScale( + *x_node->data(), scale, bias, bias_after_scale)); return SUCCESS; } @@ -65,6 +65,6 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - scale, +REGISTER_SUBGRAPH_BRIDGE(scale, + kXPU, paddle::lite::subgraph::xpu::ScaleConverter); diff --git a/lite/kernels/xpu/bridges/slice_op.cc b/lite/kernels/xpu/bridges/slice_op.cc index 90c91d3b594b91c5875830e0ce468e5ab80ecc72..3e4592d454ae9b79a51606ed9108c0ef17878276 100644 --- a/lite/kernels/xpu/bridges/slice_op.cc +++ b/lite/kernels/xpu/bridges/slice_op.cc @@ -46,11 +46,11 @@ int SliceConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto ends = op_info->GetAttr>("ends"); // Input node - std::shared_ptr input_node = nullptr; - if (graph->HasNode(input_name)) { - input_node = graph->GetNode(input_name); + std::shared_ptr input_node = nullptr; + if (graph->Has(input_name)) { + input_node = graph->Get(input_name); } else { - input_node = graph->AddNode(input_name, input_dims); + input_node = graph->Add(input_name, *input); } // Calculate the begin and end of the slice in all of @@ -74,9 +74,9 @@ int SliceConverter(void* ctx, OpLite* op, KernelBase* kernel) { strides.push_back(1); } } - graph->AddNode( - out_name, - graph->builder_.CreateStridedSlice(*input_node, begin, end, strides)); + graph->Add(out_name, + graph->builder_.CreateStridedSlice( + *input_node->data(), begin, end, strides)); return REBUILD_WHEN_SHAPE_CHANGED; } @@ -85,6 +85,6 @@ int SliceConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - slice, +REGISTER_SUBGRAPH_BRIDGE(slice, + kXPU, paddle::lite::subgraph::xpu::SliceConverter); diff --git a/lite/kernels/xpu/bridges/softmax_op.cc b/lite/kernels/xpu/bridges/softmax_op.cc index 6deb536ef17c4043e09c4b63255b585ad1abf230..d964f29a86ac00034c61706af35f8ca220921ec0 100644 --- a/lite/kernels/xpu/bridges/softmax_op.cc +++ b/lite/kernels/xpu/bridges/softmax_op.cc @@ -44,15 +44,15 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto axis = op_info->GetAttr("axis"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Softmax node - graph->AddNode(out_name, graph->builder_.CreateSoftmax(*x_node, axis)); + graph->Add(out_name, graph->builder_.CreateSoftmax(*x_node->data(), axis)); return SUCCESS; } @@ -61,6 +61,6 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - softmax, +REGISTER_SUBGRAPH_BRIDGE(softmax, + kXPU, paddle::lite::subgraph::xpu::SoftmaxConverter); diff --git a/lite/kernels/xpu/bridges/stack_op.cc b/lite/kernels/xpu/bridges/stack_op.cc index eb7d6d7b79c9cdc32b62254e429903c4cc8ea6f6..69673aaebaf0a112fe5b1339b6e253a3c3a0334b 100644 --- a/lite/kernels/xpu/bridges/stack_op.cc +++ b/lite/kernels/xpu/bridges/stack_op.cc @@ -46,19 +46,19 @@ int StackConverter(void* ctx, OpLite* op, KernelBase* kernel) { for (auto& x_name : x_names) { auto x = scope->FindMutableTensor(x_name); auto x_dims = x->dims(); - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } - x_nodes.push_back(*x_node); + x_nodes.push_back(*x_node->data()); } // Stack node - graph->AddNode(y_name, - graph->builder_.CreateStack( - xtcl::network::TupleNode::make(x_nodes), axis)); + graph->Add(y_name, + graph->builder_.CreateStack( + xtcl::network::TupleNode::make(x_nodes), axis)); return SUCCESS; } @@ -67,6 +67,6 @@ int StackConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - stack, +REGISTER_SUBGRAPH_BRIDGE(stack, + kXPU, paddle::lite::subgraph::xpu::StackConverter); diff --git a/lite/kernels/xpu/bridges/transpose_op.cc b/lite/kernels/xpu/bridges/transpose_op.cc index b6823dd6a83b279150603a45401c5ddee3cb9c2c..4217fe0119be8584f0ca83408dca92100e652076 100644 --- a/lite/kernels/xpu/bridges/transpose_op.cc +++ b/lite/kernels/xpu/bridges/transpose_op.cc @@ -44,19 +44,19 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { auto axis = op_info->GetAttr>("axis"); // X node - std::shared_ptr x_node = nullptr; - if (graph->HasNode(x_name)) { - x_node = graph->GetNode(x_name); + std::shared_ptr x_node = nullptr; + if (graph->Has(x_name)) { + x_node = graph->Get(x_name); } else { - x_node = graph->AddNode(x_name, x_dims); + x_node = graph->Add(x_name, *x); } // Transpose node - graph->AddNode(out_name, - graph->builder_.CreateTranspose( - *x_node, - CvtShape( - std::vector(axis.begin(), axis.end())))); + graph->Add(out_name, + graph->builder_.CreateTranspose( + *x_node->data(), + CvtShape( + std::vector(axis.begin(), axis.end())))); return SUCCESS; } @@ -66,9 +66,9 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) { } // namespace lite } // namespace paddle -REGISTER_SUBGRAPH_BRIDGE(XPU, - transpose, +REGISTER_SUBGRAPH_BRIDGE(transpose, + kXPU, paddle::lite::subgraph::xpu::TransposeConverter); -REGISTER_SUBGRAPH_BRIDGE(XPU, - transpose2, +REGISTER_SUBGRAPH_BRIDGE(transpose2, + kXPU, paddle::lite::subgraph::xpu::TransposeConverter); diff --git a/lite/kernels/xpu/subgraph_compute.cc b/lite/kernels/xpu/subgraph_compute.cc index 07a74b045477bcdff0d60913f20e79ff8497705b..298f6f0e507261e42dcdc1f1b06b47e09b81b21c 100644 --- a/lite/kernels/xpu/subgraph_compute.cc +++ b/lite/kernels/xpu/subgraph_compute.cc @@ -39,13 +39,13 @@ int SubgraphEngine::BuildDeviceProgram() { op->CheckShape(); op->InferShape(); std::string op_type = op->op_info()->Type(); - if (!bridges.Exists("XPU", op_type)) { + if (!bridges.Exists(op_type, "kXPU")) { return subgraph::FAILED; } auto kernel = inst.kernel(); - status |= bridges.Select("XPU", op_type)(reinterpret_cast(&graph), - const_cast(op), - const_cast(kernel)); + status |= bridges.Select(op_type, "kXPU")(reinterpret_cast(&graph), + const_cast(op), + const_cast(kernel)); if (subgraph::CHECK_FAILED(status)) { return subgraph::FAILED; } @@ -57,26 +57,26 @@ int SubgraphEngine::BuildDeviceProgram() { std::vector device_inodes; std::vector device_onodes; for (auto& input_name : input_names_) { - if (graph.HasNode(input_name)) { - if (!graph.GetType(input_name).persistable()) { - device_inodes.push_back(graph.GetNode(input_name).get()); + if (graph.Has(input_name)) { + if (graph.Get(input_name)->is_data()) { + device_inodes.push_back(graph.Get(input_name)->data().get()); device_inames_.push_back(input_name); } else { LOG(WARNING) << "[XPU] Input node " << input_name - << " is skipped because it is a persistable node."; + << " is ignored because it is not a data node."; } } else { LOG(WARNING) << "[XPU] Input node " << input_name - << " is skipped because it does not exist."; + << " is ignored because it does not exist."; } } for (auto& output_name : output_names_) { - if (graph.HasNode(output_name)) { - device_onodes.push_back(graph.GetNode(output_name).get()); + if (graph.Has(output_name)) { + device_onodes.push_back(graph.Get(output_name)->data().get()); device_onames_.push_back(output_name); } else { LOG(WARNING) << "[XPU] Output node " << output_name - << " is skipped because it does not exist."; + << " is ignored because it does not exist."; } } CHECK(!device_inames_.empty()) @@ -98,14 +98,14 @@ int SubgraphEngine::BuildDeviceProgram() { origin_otensors_.resize(device_onames_.size()); device_otensors_.resize(device_onames_.size()); for (int i = 0; i < device_inames_.size(); i++) { - auto type = graph.GetType(device_inames_[i]); - auto precision = type.precision(); - auto layout = type.layout(); + auto node = graph.Get(device_inames_[i]); + auto precision = node->precision(); + auto layout = node->layout(); origin_itensors_[i] = scope_->FindMutableTensor(device_inames_[i]); CHECK(origin_itensors_[i]); origin_idims_[i] = origin_itensors_[i]->dims(); - VLOG(3) << "[XPU] Inputs[" << i - << "] precision: " << PrecisionToStr(precision) + VLOG(3) << "[XPU] Inputs[" << i << "] name: " << device_inames_[i] + << " precision: " << PrecisionToStr(precision) << " layout: " << DataLayoutToStr(layout) << " dims: " << origin_idims_[i]; // Prepare the device input tensors which share data with the origin input @@ -122,14 +122,14 @@ int SubgraphEngine::BuildDeviceProgram() { device_itensors_[i].byte_offset = 0; } for (int i = 0; i < device_onames_.size(); i++) { - auto type = graph.GetType(device_onames_[i]); - auto precision = type.precision(); - auto layout = type.layout(); + auto node = graph.Get(device_onames_[i]); + auto precision = node->precision(); + auto layout = node->layout(); origin_otensors_[i] = scope_->FindMutableTensor(device_onames_[i]); CHECK(origin_otensors_[i]); origin_odims_[i] = origin_otensors_[i]->dims(); - VLOG(3) << "[XPU] Outputs[" << i - << "] precision: " << PrecisionToStr(precision) + VLOG(3) << "[XPU] Outputs[" << i << "] name: " << device_onames_[i] + << " precision: " << PrecisionToStr(precision) << " layout: " << DataLayoutToStr(layout) << " dims: " << origin_odims_[i]; // Prepare the device output tensors which share data with the origin output diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index d2794f6c847727e5d539cdebde3bd769189b73bc..9b6eae2e4bcc6ccbaf14bb0192603dddefc9c36d 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -29,7 +29,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) if(LITE_BUILD_EXTRA) diff --git a/lite/tests/kernels/scale_compute_test.cc b/lite/tests/kernels/scale_compute_test.cc index 706936d2b1e113a2acde888e91c6e0a2f2d6582f..1ededcd52d3fb4c8881a391dce5e7f22e87cdb44 100644 --- a/lite/tests/kernels/scale_compute_test.cc +++ b/lite/tests/kernels/scale_compute_test.cc @@ -16,6 +16,7 @@ #include "lite/api/paddle_use_kernels.h" #include "lite/api/paddle_use_ops.h" #include "lite/core/arena/framework.h" +#include "lite/tests/utils/fill_data.h" namespace paddle { namespace lite { @@ -23,31 +24,33 @@ namespace lite { class ScaleComputeTester : public arena::TestCase { protected: // common attributes for this op. - std::string input_ = "x"; - std::string output_ = "out"; + std::string x_ = "x"; + std::string out_ = "out"; + DDim x_dims_{{100, 20}}; float scale_ = 0.; float bias_ = 0.; - DDim dims_{{100, 20}}; bool bias_after_scale_; public: ScaleComputeTester(const Place& place, const std::string& alias, + const DDim& x_dims, float scale, float bias, bool bias_after_scale) : TestCase(place, alias), + x_dims_(x_dims), scale_(scale), bias_(bias), bias_after_scale_(bias_after_scale) {} void RunBaseline(Scope* scope) override { - auto* out = scope->NewTensor(output_); + auto* out = scope->NewTensor(out_); CHECK(out); - out->Resize(dims_); + out->Resize(x_dims_); auto* out_data = out->mutable_data(); - auto* x = scope->FindTensor(input_); + auto* x = scope->FindTensor(x_); const auto* x_data = x->data(); float bias = bias_; @@ -56,35 +59,34 @@ class ScaleComputeTester : public arena::TestCase { bias *= scale_; } - for (int i = 0; i < dims_.production(); i++) { + for (int i = 0; i < x_dims_.production(); i++) { out_data[i] = x_data[i] * scale_ + bias; } } void PrepareOpDesc(cpp::OpDesc* op_desc) { op_desc->SetType("scale"); - op_desc->SetInput("X", {input_}); - op_desc->SetOutput("Out", {output_}); + op_desc->SetInput("X", {x_}); + op_desc->SetOutput("Out", {out_}); op_desc->SetAttr("scale", scale_); op_desc->SetAttr("bias", bias_); op_desc->SetAttr("bias_after_scale", bias_after_scale_); } void PrepareData() override { - std::vector data(dims_.production()); - - for (int i = 0; i < dims_.production(); i++) { - data[i] = i * 1.1; - } - - SetCommonTensor(input_, dims_, data.data()); + std::vector x(x_dims_.production()); + fill_data_rand(x.data(), -1.f, 1.f, x_dims_.production()); + SetCommonTensor(x_, x_dims_, x.data()); } }; TEST(Scale, precision) { Place place; float abs_error = 2e-5; -#if defined(LITE_WITH_ARM) +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 4e-3; // Using fp16 in NPU +#elif defined(LITE_WITH_ARM) place = TARGET(kARM); #elif defined(LITE_WITH_XPU) place = TARGET(kXPU); @@ -95,13 +97,16 @@ TEST(Scale, precision) { return; #endif - for (float scale : {0.123, 2., -1.2}) { - for (float bias : {1., 0., -1.2331}) { - for (bool bias_before : {true, false}) { - std::unique_ptr tester( - new ScaleComputeTester(place, "def", scale, bias, bias_before)); - arena::Arena arena(std::move(tester), place, abs_error); - arena.TestPrecision(); + for (auto x_dims : + std::vector>{{5, 2, 3, 4}, {8, 3, 5}, {12, 3}}) { + for (float scale : {0.123, 2., -1.2}) { + for (float bias : {1., 0., -1.2331}) { + for (bool bias_after_scale : {true, false}) { + std::unique_ptr tester(new ScaleComputeTester( + place, "def", DDim(x_dims), scale, bias, bias_after_scale)); + arena::Arena arena(std::move(tester), place, abs_error); + arena.TestPrecision(); + } } } } @@ -117,8 +122,8 @@ TEST(Scale, performance) { return; #endif - std::unique_ptr tester( - new ScaleComputeTester(place, "def", 1.2, 1.1, true)); + std::unique_ptr tester(new ScaleComputeTester( + place, "def", DDim(std::vector{5, 2, 3, 4}), 1.2, 1.1, true)); // To modify the arm context, one can retrive the context as follows. // #ifdef LITE_WITH_ARM diff --git a/lite/tests/kernels/softmax_compute_test.cc b/lite/tests/kernels/softmax_compute_test.cc index 94100da2b13aecea8ac50ea7a4ae52350455051f..a91f6534ffa1f8022e2005cc83255d306adf77c1 100644 --- a/lite/tests/kernels/softmax_compute_test.cc +++ b/lite/tests/kernels/softmax_compute_test.cc @@ -25,33 +25,33 @@ class SoftmaxComputeTest : public arena::TestCase { protected: // common attributes for this op. std::string op_type_ = "softmax"; - std::string input_ = "x"; - std::string output_ = "out"; - DDim dims_{{1, 2, 3, 4}}; + DDim x_dims_{{1, 2, 3, 4}}; + std::string x_ = "x"; + std::string out_ = "out"; int axis_ = 1; public: SoftmaxComputeTest(const Place& place, const std::string& alias, - DDim dims, + DDim x_dims, int axis) - : TestCase(place, alias), dims_(dims), axis_(axis) {} + : TestCase(place, alias), x_dims_(x_dims), axis_(axis) {} void RunBaseline(Scope* scope) override { - auto x = scope->FindTensor(input_); - auto out = scope->NewTensor(output_); + auto x = scope->FindTensor(x_); + auto out = scope->NewTensor(out_); CHECK(out); - out->Resize(dims_); + out->Resize(x_dims_); auto x_data = x->data(); auto out_data = out->mutable_data(); - auto x_rank = dims_.size(); + auto x_rank = x_dims_.size(); if (axis_ < 0) { axis_ += x_rank; } - int axis_size = dims_[axis_]; - int outer_num = dims_.Slice(0, axis_).production(); - int inner_num = dims_.Slice(axis_ + 1, x_rank).production(); + int axis_size = x_dims_[axis_]; + int outer_num = x_dims_.Slice(0, axis_).production(); + int inner_num = x_dims_.Slice(axis_ + 1, x_rank).production(); int compute_size = outer_num * inner_num; for (int i = 0; i < compute_size; i++) { int idx_inner = i % inner_num; @@ -84,15 +84,15 @@ class SoftmaxComputeTest : public arena::TestCase { void PrepareOpDesc(cpp::OpDesc* op_desc) { op_desc->SetType(op_type_); - op_desc->SetInput("X", {input_}); - op_desc->SetOutput("Out", {output_}); + op_desc->SetInput("X", {x_}); + op_desc->SetOutput("Out", {out_}); op_desc->SetAttr("axis", axis_); } void PrepareData() override { - std::vector din(dims_.production()); - fill_data_rand(din.data(), -1.f, 1.f, dims_.production()); - SetCommonTensor(input_, dims_, din.data()); + std::vector x(x_dims_.production()); + fill_data_rand(x.data(), -1.f, 1.f, x_dims_.production()); + SetCommonTensor(x_, x_dims_, x.data()); } }; @@ -100,18 +100,21 @@ TEST(Softmax, precision) { LOG(INFO) << "test softmax op"; float abs_error = 2e-5; Place place; -#if defined(LITE_WITH_XPU) +#if defined(LITE_WITH_NPU) + place = TARGET(kNPU); + abs_error = 4e-3; // Using fp16 in NPU +#elif defined(LITE_WITH_XPU) place = TARGET(kXPU); #else return; #endif - std::vector> dims{{1, 2, 3, 4}, {2, 3, 4}, {3, 4}}; - for (auto dim_in : dims) { + for (auto x_dims : + std::vector>{{1, 2, 3, 4}, {2, 3, 4}, {3, 4}}) { for (auto axis : {-1, 0, 1, 2, 3}) { - if (axis >= dim_in.size()) continue; + if (axis >= x_dims.size()) continue; std::unique_ptr tester( - new SoftmaxComputeTest(place, "def", DDim(dim_in), axis)); + new SoftmaxComputeTest(place, "def", DDim(x_dims), axis)); arena::Arena arena(std::move(tester), place, abs_error); arena.TestPrecision(); }