From 7cd585aa2fc2a11d3f928f6b736b1d65f93f1805 Mon Sep 17 00:00:00 2001 From: --get Date: Wed, 27 May 2020 07:05:35 +0000 Subject: [PATCH] (feat): add norm mlu kernel and test --- lite/kernels/mlu/bridges/CMakeLists.txt | 3 + lite/kernels/mlu/bridges/norm_op.cc | 111 +++++++++++++ lite/kernels/mlu/bridges/norm_op_test.cc | 148 ++++++++++++++++++ lite/kernels/mlu/bridges/paddle_use_bridges.h | 1 + 4 files changed, 263 insertions(+) create mode 100644 lite/kernels/mlu/bridges/norm_op.cc create mode 100644 lite/kernels/mlu/bridges/norm_op_test.cc diff --git a/lite/kernels/mlu/bridges/CMakeLists.txt b/lite/kernels/mlu/bridges/CMakeLists.txt index 03b8386d88..8e90e1080f 100644 --- a/lite/kernels/mlu/bridges/CMakeLists.txt +++ b/lite/kernels/mlu/bridges/CMakeLists.txt @@ -60,6 +60,8 @@ if (LITE_BUILD_EXTRA) list(APPEND mlu_subgraph_bridges subgraph_bridge_lrn_op_mlu) lite_cc_library(subgraph_bridge_gather_op_mlu SRCS gather_op.cc DEPS ${subgraph_bridge_deps_mlu}) list(APPEND mlu_subgraph_bridges subgraph_bridge_gather_op_mlu) + lite_cc_library(subgraph_bridge_norm_op_mlu SRCS norm_op.cc DEPS ${subgraph_bridge_deps_mlu}) + list(APPEND mlu_subgraph_bridges subgraph_bridge_norm_op_mlu) endif() lite_cc_library(subgraph_test_helper_mlu SRCS test_helper.cc DEPS ${mlu_subgraph_bridges}) @@ -84,6 +86,7 @@ lite_cc_test(test_squeeze_converter_mlu SRCS squeeze_op_test.cc DEPS scope optim lite_cc_test(test_reshape_converter_mlu SRCS reshape_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_flatten_converter_mlu SRCS flatten_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) if (LITE_BUILD_EXTRA) + lite_cc_test(test_norm_converter_mlu SRCS norm_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_lrn_converter_mlu SRCS lrn_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_gather_converter_mlu SRCS gather_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) endif() diff --git a/lite/kernels/mlu/bridges/norm_op.cc b/lite/kernels/mlu/bridges/norm_op.cc new file mode 100644 index 0000000000..492c3932a8 --- /dev/null +++ b/lite/kernels/mlu/bridges/norm_op.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/mlu/bridges/graph.h" +#include "lite/kernels/mlu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +int NormConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[MLU] Converting " + op_type + "..."; + + // Get input vars and op attributes + auto x_var_name = op_info->Input("X").front(); + auto x = scope->FindVar(x_var_name)->GetMutable(); + auto x_dims = x->dims().Vectorize(); + + auto out_var_name = op_info->Output("Out").front(); + auto output = scope->FindVar(out_var_name)->GetMutable(); + auto output_dims = output->dims().Vectorize(); + int axis = op_info->GetAttr("axis"); + int epsilon = op_info->GetAttr("epsilon"); + if (axis < 0) { + axis = axis + x_dims.size(); + } + std::vector nchw2nhwc = {0, 3, 1, 2}; + int nhwc_axis = nchw2nhwc[axis]; + + CHECK(graph->HasNode(x_var_name)); + auto input_tensor = graph->GetNode(x_var_name); + auto output_tensor = graph->AddNode( + out_var_name, output_dims, CNML_TENSOR, CNML_NCHW, graph->FPType()); + + // ======== DEBUG =============== + VLOG(6) << "x name=" << x_var_name; + VLOG(6) << "out name=" << out_var_name; + VLOG(6) << "x dims=" << x->dims(); + VLOG(6) << "out dims=" << output->dims(); + VLOG(6) << "axis =" << axis; + VLOG(6) << "nwhc axis=" << nhwc_axis; + VLOG(6) << "epsilon =" << epsilon; + // cnmlPrintTensor(input_tensor->mlu_tensor(), CNML_TENSOR); + // cnmlPrintTensor(output_tensor->mlu_tensor(), CNML_TENSOR); + // ======== DEBUG END ============ + cnmlBaseOp_t norm_op{nullptr}; + + cnmlNormalizeOpParam_t param; + int mode = -1; + switch (axis) { + case 0: + mode = 3; // N + break; + case 1: + mode = 0; // C + break; + case 2: + mode = 4; // H + break; + case 3: + mode = 5; // W + break; + default: + CHECK(0); + break; + } + cnmlCreateNormalizeOpParamV2(¶m, + 0, // p + 0, // use_scale + mode, + 1, // weight + epsilon); + + CNML_CALL(cnmlCreateNormalizeOp(&norm_op, + param, + input_tensor->mlu_tensor(), + output_tensor->mlu_tensor(), + nullptr, + false /*is_fix8_mode*/)); + graph->FuseOp(norm_op); + CNML_CALL(cnmlDestroyBaseOp(&norm_op)); + return SUCCESS; +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(norm, + kMLU, + paddle::lite::subgraph::mlu::NormConverter); diff --git a/lite/kernels/mlu/bridges/norm_op_test.cc b/lite/kernels/mlu/bridges/norm_op_test.cc new file mode 100644 index 0000000000..35b5eabbb9 --- /dev/null +++ b/lite/kernels/mlu/bridges/norm_op_test.cc @@ -0,0 +1,148 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/norm_op.h" + +#include + +#include +#include + +#include "lite/core/op_registry.h" +#include "lite/kernels/mlu/bridges/test_helper.h" +#include "lite/kernels/mlu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +// void ToFile(std::string file_name, Tensor* tensor) { +// int count = tensor->dims().production(); +// auto data = tensor->mutable_data(); +// std::ostringstream outs; +// for (size_t i = 0; i < count; i++) { +// outs << data[i] << std::endl; +// } +// std::ofstream of; +// of.open(file_name, std::ios::out); +// of << outs.str(); +// of.close(); +// } + +void norm_ref(const std::shared_ptr op) { + Scope* scope = op->scope(); + const OpInfo* op_info = op->op_info(); + auto x = scope->FindVar(op_info->Input("X").front())->GetMutable(); + auto out = + scope->FindVar(op_info->Output("Out").front())->GetMutable(); + int axis = op_info->GetAttr("axis"); + int epsilon = op_info->GetAttr("epsilon"); + auto x_dims = x->dims(); + if (axis < 0) { + axis += x_dims.size(); + } + out->Resize(x_dims.Vectorize()); + auto* out_data = out->mutable_data(); + + const auto* x_data = x->data(); + int pre_n = x_dims.count(0, axis); + int n = x_dims[axis]; + int post_n = x_dims.count(axis + 1, x_dims.size()); + for (int i = 0; i < pre_n; i++) { + for (int k = 0; k < post_n; k++) { + float sum = epsilon; + const float* in_tmp = x_data + i * n * post_n + k; + for (int j = 0; j < n; j++) { + sum += in_tmp[j * post_n] * in_tmp[j * post_n]; + } + sum = std::sqrt(sum); + float* out_tmp = out_data + i * n * post_n + k; + for (int j = 0; j < n; j++) { + out_tmp[j * post_n] = in_tmp[j * post_n] / sum; + } + } + } +} + +void test_norm(const std::vector& input_shape, int axis) { + // prepare input&output variables + Scope scope; + std::string x_var_name = "x"; + std::string out_var_name = "out"; + std::string out_ref_var_name = "out_ref"; + auto* x = scope.Var(x_var_name)->GetMutable(); + auto* out = scope.Var(out_var_name)->GetMutable(); + auto* out_ref = scope.Var(out_ref_var_name)->GetMutable(); + x->Resize(input_shape); + // initialize input&output data + FillTensor(x, -9, 9); + // initialize op desc + cpp::OpDesc opdesc; + float epsilon = 1e-9f; + opdesc.SetType("norm"); + opdesc.SetInput("X", {x_var_name}); + opdesc.SetOutput("Out", {out_var_name}); + opdesc.SetAttr("axis", static_cast(axis)); + opdesc.SetAttr("epsilon", static_cast(epsilon)); + + // create and convert op to MLU model, then run it on MLU + auto op = CreateOp(opdesc, &scope); + norm_ref(op); + out_ref->CopyDataFrom(*out); + Tensor input_x; + input_x.Resize(DDim(input_shape)); + // change input layout from NCHW to NHWC + transpose(x->mutable_data(), + input_x.mutable_data(), + {static_cast(input_shape[0]), + static_cast(input_shape[1]), + static_cast(input_shape[2]), + static_cast(input_shape[3])}, + {0, 2, 3, 1}); + x->CopyDataFrom(input_x); + + LaunchOp(op, {x_var_name}, {out_var_name}); + auto* out_data = out->mutable_data(); + auto* out_ref_data = out_ref->mutable_data(); + std::vector out_shape = input_shape; + Tensor output_trans; + output_trans.Resize(out_shape); + // Change output layout from NHWC to NCHW + transpose(out_data, + output_trans.mutable_data(), + {static_cast(out_shape[0]), + static_cast(out_shape[2]), + static_cast(out_shape[3]), + static_cast(out_shape[1])}, + {0, 3, 1, 2}); + out_data = output_trans.mutable_data(); + + for (int i = 0; i < out->dims().production(); i++) { + EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2); + } +} + +TEST(MLUBridges, norm) { + test_norm({1, 2, 3, 4}, 1); + test_norm({1, 2, 3, 4}, 2); + test_norm({1, 2, 3, 4}, 3); +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +USE_SUBGRAPH_BRIDGE(norm, kMLU); diff --git a/lite/kernels/mlu/bridges/paddle_use_bridges.h b/lite/kernels/mlu/bridges/paddle_use_bridges.h index 703687df87..9bd2f3357f 100644 --- a/lite/kernels/mlu/bridges/paddle_use_bridges.h +++ b/lite/kernels/mlu/bridges/paddle_use_bridges.h @@ -42,4 +42,5 @@ USE_SUBGRAPH_BRIDGE(squeeze2, kMLU); #ifdef LITE_BUILD_EXTRA USE_SUBGRAPH_BRIDGE(gather, kMLU); USE_SUBGRAPH_BRIDGE(lrn, kMLU) +USE_SUBGRAPH_BRIDGE(norm, kMLU) #endif -- GitLab