From 328d2da44546efd30fb47270d57e11a3ce2351bb Mon Sep 17 00:00:00 2001 From: zhaoying9105 Date: Tue, 16 Jun 2020 19:28:47 +0800 Subject: [PATCH] three ops: density prior box & box coder & multiclass nms (#116) * (feat): add cnplugin cmake * (feat): add cnplugin deps to backends/CMakeLists.txt and utility.h * (feat): add box_coder converter * (feat): add density_prior_box op * (feat) add multiclass nms converter, while workspace as static tensor * (ref): change nms test input from file to random, add iou * (ref): add density_prior_box & box_coder to paddle_use_bridges.h --- cmake/mlu.cmake | 18 + lite/backends/mlu/CMakeLists.txt | 2 +- lite/kernels/mlu/bridges/CMakeLists.txt | 9 + lite/kernels/mlu/bridges/box_coder_op.cc | 166 +++++ lite/kernels/mlu/bridges/box_coder_op_test.cc | 505 +++++++++++++++ .../mlu/bridges/density_prior_box_op.cc | 231 +++++++ .../mlu/bridges/density_prior_box_op_test.cc | 302 +++++++++ lite/kernels/mlu/bridges/multiclass_nms.cc | 250 ++++++++ .../kernels/mlu/bridges/multiclass_nms_api.cc | 132 ++++ lite/kernels/mlu/bridges/multiclass_nms_api.h | 82 +++ .../kernels/mlu/bridges/multiclass_nms_impl.o | Bin 0 -> 400888 bytes .../mlu/bridges/multiclass_nms_op_test.cc | 604 ++++++++++++++++++ lite/kernels/mlu/bridges/paddle_use_bridges.h | 3 + lite/kernels/mlu/bridges/utility.h | 1 + 14 files changed, 2304 insertions(+), 1 deletion(-) create mode 100644 lite/kernels/mlu/bridges/box_coder_op.cc create mode 100644 lite/kernels/mlu/bridges/box_coder_op_test.cc create mode 100644 lite/kernels/mlu/bridges/density_prior_box_op.cc create mode 100644 lite/kernels/mlu/bridges/density_prior_box_op_test.cc create mode 100644 lite/kernels/mlu/bridges/multiclass_nms.cc create mode 100644 lite/kernels/mlu/bridges/multiclass_nms_api.cc create mode 100644 lite/kernels/mlu/bridges/multiclass_nms_api.h create mode 100644 lite/kernels/mlu/bridges/multiclass_nms_impl.o create mode 100644 lite/kernels/mlu/bridges/multiclass_nms_op_test.cc diff --git a/cmake/mlu.cmake b/cmake/mlu.cmake index b73ab16462..580fbc95bd 100644 --- a/cmake/mlu.cmake +++ b/cmake/mlu.cmake @@ -36,6 +36,12 @@ if(NOT CNRT_INC) message(FATAL_ERROR "Can not find cnrt.h in ${NEUWARE_HOME}/include") endif() +find_path(CNPLUGIN_INC NAMES cnplugin.h + PATHS ${NEUWARE_HOME}/include NO_DEFAULT_PATH) +if(NOT CNPLUGIN_INC) + message(FATAL_ERROR "Can not find cnplugin.h in ${NEUWARE_HOME}/include") +endif() + include_directories("${NEUWARE_HOME}/include") find_library(CNML_LIB_FILE NAMES cnml @@ -59,3 +65,15 @@ else() add_library(cnrt_lib SHARED IMPORTED GLOBAL) set_property(TARGET cnrt_lib PROPERTY IMPORTED_LOCATION ${CNRT_LIB_FILE}) endif() + + +find_library(CNPLUGIN_LIB_FILE NAMES cnplugin + PATHS ${NEUWARE_HOME}/lib64) + +if(NOT CNPLUGIN_LIB_FILE) + message(FATAL_ERROR "Can not find CNPLUGIN Library in ${NEUWARE_HOME}/lib64") +else() + message(STATUS "Found CNPLUGIN Library: ${CNPLUGIN_LIB_FILE}") + add_library(cnplugin_lib SHARED IMPORTED GLOBAL) + set_property(TARGET cnplugin_lib PROPERTY IMPORTED_LOCATION ${CNPLUGIN_LIB_FILE}) +endif() \ No newline at end of file diff --git a/lite/backends/mlu/CMakeLists.txt b/lite/backends/mlu/CMakeLists.txt index 29c90b4220..e4d997348e 100644 --- a/lite/backends/mlu/CMakeLists.txt +++ b/lite/backends/mlu/CMakeLists.txt @@ -4,4 +4,4 @@ endif() message (STATUS "Lite with mlu backend") -lite_cc_library(target_wrapper_mlu SRCS target_wrapper.cc DEPS cnml_lib cnrt_lib) +lite_cc_library(target_wrapper_mlu SRCS target_wrapper.cc DEPS cnml_lib cnrt_lib cnplugin_lib) diff --git a/lite/kernels/mlu/bridges/CMakeLists.txt b/lite/kernels/mlu/bridges/CMakeLists.txt index a580426b2d..0d91b3f35d 100644 --- a/lite/kernels/mlu/bridges/CMakeLists.txt +++ b/lite/kernels/mlu/bridges/CMakeLists.txt @@ -28,6 +28,9 @@ lite_cc_library(subgraph_bridge_argmax_op_mlu SRCS argmax_op.cc DEPS ${subgraph_ lite_cc_library(subgraph_bridge_squeeze_op_mlu SRCS squeeze_op.cc DEPS ${subgraph_bridge_deps_mlu}) lite_cc_library(subgraph_bridge_reshape_op_mlu SRCS reshape_op.cc DEPS ${subgraph_bridge_deps_mlu}) lite_cc_library(subgraph_bridge_flatten_op_mlu SRCS flatten_op.cc DEPS ${subgraph_bridge_deps_mlu}) +lite_cc_library(subgraph_bridge_box_coder_op_mlu SRCS box_coder_op.cc DEPS ${subgraph_bridge_deps_mlu}) +lite_cc_library(subgraph_bridge_density_prior_box_op_mlu SRCS density_prior_box_op.cc DEPS ${subgraph_bridge_deps_mlu}) +lite_cc_library(subgraph_bridge_multiclass_nms_mlu SRCS multiclass_nms.cc multiclass_nms_api.cc multiclass_nms_impl.o DEPS ${subgraph_bridge_deps_mlu}) set(mlu_subgraph_bridges subgraph_bridge_registry subgraph_bridge_utility_mlu @@ -52,6 +55,9 @@ set(mlu_subgraph_bridges subgraph_bridge_squeeze_op_mlu subgraph_bridge_reshape_op_mlu subgraph_bridge_flatten_op_mlu + subgraph_bridge_box_coder_op_mlu + subgraph_bridge_density_prior_box_op_mlu + subgraph_bridge_multiclass_nms_mlu CACHE INTERNAL "mlu_subgraph_bridges") @@ -88,6 +94,9 @@ lite_cc_test(test_argmax_converter_mlu SRCS argmax_op_test.cc DEPS scope optimiz lite_cc_test(test_squeeze_converter_mlu SRCS squeeze_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_reshape_converter_mlu SRCS reshape_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_flatten_converter_mlu SRCS flatten_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) +lite_cc_test(test_box_coder_mlu SRCS box_coder_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) +lite_cc_test(test_density_prior_box_mlu SRCS density_prior_box_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) +lite_cc_test(test_multiclass_nms_op_converter_mlu SRCS multiclass_nms_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) if (LITE_BUILD_EXTRA) lite_cc_test(test_norm_converter_mlu SRCS norm_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) lite_cc_test(test_lrn_converter_mlu SRCS lrn_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu) diff --git a/lite/kernels/mlu/bridges/box_coder_op.cc b/lite/kernels/mlu/bridges/box_coder_op.cc new file mode 100644 index 0000000000..ec1617cff2 --- /dev/null +++ b/lite/kernels/mlu/bridges/box_coder_op.cc @@ -0,0 +1,166 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/mlu/bridges/graph.h" +#include "lite/kernels/mlu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +inline cnmlBoxCodeType_t GetBoxCodeType(const std::string& type) { + if (type == "encode_center_size") { + return cnmlBoxCodeType_t::Encode; + } + return cnmlBoxCodeType_t::Decode; +} + +int BoxCoderConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[MLU] Converting " + op_type + "..."; + + auto Prior_box_name = op_info->Input("PriorBox").front(); + auto Target_box_name = op_info->Input("TargetBox").front(); + auto Output_box_name = op_info->Output("OutputBox").front(); + std::vector input_arg_names = op_info->InputArgumentNames(); + if (std::find(input_arg_names.begin(), + input_arg_names.end(), + "PriorBoxVar") == input_arg_names.end()) { + LOG(FATAL) << "box coder mlu kernel expect PriorBoxVar input" << std::endl; + } + auto box_var_name = op_info->Input("PriorBoxVar").front(); + + auto* prior_box = scope->FindVar(Prior_box_name)->GetMutable(); + auto* target_box = scope->FindVar(Target_box_name)->GetMutable(); + auto* proposals = scope->FindVar(Output_box_name)->GetMutable(); + auto* box_var = scope->FindVar(box_var_name)->GetMutable(); + + auto code_type_str = op_info->GetAttr("code_type"); + auto box_normalized = op_info->GetAttr("box_normalized"); + int axis = -1; + if (op_info->HasAttr("axis")) { + axis = op_info->GetAttr("axis"); + } else { + LOG(FATAL) << "box coder mlu kernel expect axis" << std::endl; + } + + if (op_info->HasAttr("variance")) { + LOG(WARNING) << "box coder mlu kernel expect not have variance attr" + << std::endl; + VLOG(6) << "variance: "; + auto variance_vec = op_info->GetAttr>("variance"); + for (size_t i = 0; i < variance_vec.size(); i++) { + VLOG(6) << variance_vec[i]; + } + } + cnmlBoxCodeType_t code_type = GetBoxCodeType(code_type_str); + + int row = -1; + int len = -1; + int col = -1; + if (code_type == cnmlBoxCodeType_t::Encode) { + // target_box_shape = {row, len}; + // prior_box_shape = {col, len}; + // output_shape = {row, col, len}; + row = target_box->dims()[0]; + len = target_box->dims()[1]; + col = prior_box->dims()[0]; + } else if (code_type == cnmlBoxCodeType_t::Decode) { + // target_box_shape = {row,col,len}; + // prior_box_shape = {col, len} if axis == 0, or {row, len}; + // output_shape = {row, col, len}; + row = target_box->dims()[0]; + col = target_box->dims()[1]; + len = target_box->dims()[2]; + if (axis == 0) { + CHECK(prior_box->dims()[0] == col); + } else { + CHECK(prior_box->dims()[0] == row); + } + } + + bool float32_precision = false; + if (graph->FPType() == CNML_DATA_FLOAT32) { + float32_precision = true; + } + + // =================== DEBUG ====================== + VLOG(6) << "prior_box->dims(): " << prior_box->dims(); + VLOG(6) << "target_box->dims(): " << target_box->dims(); + VLOG(6) << "box_var->dims(): " << box_var->dims(); + VLOG(6) << "proposals->dims(): " << proposals->dims(); + VLOG(6) << "code_type_str: " << code_type_str; + VLOG(6) << "col: " << col; + VLOG(6) << "row: " << row; + VLOG(6) << "len: " << len; + VLOG(6) << "axis: " << axis; + VLOG(6) << "box_normalized :" << box_normalized; + VLOG(6) << "float32_precision: " << float32_precision; + VLOG(6) << "Prior_box_name: " << Prior_box_name; + VLOG(6) << "Target_box_name: " << Target_box_name; + VLOG(6) << "Output_box_name: " << Output_box_name; + VLOG(6) << "box_var_name: " << box_var_name; + + // =================== DEBUG END ====================== + auto target_box_tensor = graph->GetNode(Target_box_name); + auto prior_box_tensor = graph->GetNode(Prior_box_name); + auto box_var_tensor = graph->GetNode(box_var_name); + auto proposals_tensor = graph->AddNode(Output_box_name, + proposals->dims().Vectorize(), + CNML_TENSOR, + CNML_NCHW, + graph->FPType()); + cnmlPluginBoxCoderOpParam_t param; + CNML_CALL( + cnmlCreatePluginBoxCoderOpParam(¶m, + row, + col, + len, + axis, + box_normalized, + float32_precision, + code_type, + TargetWrapperMlu::MLUCoreVersion())); + cnmlBaseOp_t box_coder_op; + cnmlTensor_t input_tensors[3]; + input_tensors[0] = target_box_tensor->mlu_tensor(); + input_tensors[1] = prior_box_tensor->mlu_tensor(); + input_tensors[2] = box_var_tensor->mlu_tensor(); + cnmlTensor_t output_tensors[1]; + output_tensors[0] = proposals_tensor->mlu_tensor(); + CNML_CALL(cnmlCreatePluginBoxCoderOp( + &box_coder_op, param, input_tensors, output_tensors)); + + // CNML_CALL(cnmlSetOperationComputingLayout(box_coder_op, CNML_NCHW)); // + // important + graph->FuseOp(box_coder_op); + cnmlDestroyPluginBoxCoderOpParam(¶m); + return SUCCESS; +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(box_coder, + kMLU, + paddle::lite::subgraph::mlu::BoxCoderConverter); diff --git a/lite/kernels/mlu/bridges/box_coder_op_test.cc b/lite/kernels/mlu/bridges/box_coder_op_test.cc new file mode 100644 index 0000000000..625e3aaf6a --- /dev/null +++ b/lite/kernels/mlu/bridges/box_coder_op_test.cc @@ -0,0 +1,505 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/box_coder_op.h" +#include +#include +#include "lite/core/op_registry.h" +#include "lite/kernels/mlu/bridges/test_helper.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +void ToFile(Tensor *tensor, std::string file_name) { + int count = tensor->dims().production(); + auto data = tensor->mutable_data(); + std::ostringstream outs; + for (size_t i = 0; i < count; i++) { + outs << data[i] << std::endl; + } + std::ofstream of; + of.open(file_name, std::ios::out); + of << outs.str(); + of.close(); +} + +inline std::string BoxCodeTypeToStr(cnmlBoxCodeType_t code_type) { + if (code_type == cnmlBoxCodeType_t::Encode) { + return "encode_center_size"; + } else if (code_type == cnmlBoxCodeType_t::Decode) { + return "decode_center_size"; + } else { + CHECK(false); + } +} + +inline cnmlBoxCodeType_t GetBoxCodeType(const std::string &type) { + if (type == "encode_center_size") { + return cnmlBoxCodeType_t::Encode; + } else if (type == "decode_center_size") { + return cnmlBoxCodeType_t::Decode; + } else { + CHECK(false); + } +} + +void EncodeCenterSize(float *target_box_data, + float *prior_box_data, + float *prior_box_var_data, + std::vector target_box_shape, + std::vector prior_box_shape, + std::vector prior_box_var_shape, + const bool normalized, + const std::vector variance, + float *output) { + int64_t row = target_box_shape[0]; + int64_t col = prior_box_shape[0]; + int64_t len = prior_box_shape[1]; + + for (int64_t i = 0; i < row; ++i) { + for (int64_t j = 0; j < col; ++j) { + size_t offset = i * col * len + j * len; + float prior_box_width = prior_box_data[j * len + 2] - + prior_box_data[j * len] + (normalized == false); + float prior_box_height = prior_box_data[j * len + 3] - + prior_box_data[j * len + 1] + + (normalized == false); + float prior_box_center_x = prior_box_data[j * len] + prior_box_width / 2; + float prior_box_center_y = + prior_box_data[j * len + 1] + prior_box_height / 2; + + float target_box_center_x = + (target_box_data[i * len + 2] + target_box_data[i * len]) / 2; + float target_box_center_y = + (target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2; + float target_box_width = target_box_data[i * len + 2] - + target_box_data[i * len] + (normalized == false); + float target_box_height = target_box_data[i * len + 3] - + target_box_data[i * len + 1] + + (normalized == false); + + output[offset] = + (target_box_center_x - prior_box_center_x) / prior_box_width; + output[offset + 1] = + (target_box_center_y - prior_box_center_y) / prior_box_height; + output[offset + 2] = + std::log(std::fabs(target_box_width / prior_box_width)); + output[offset + 3] = + std::log(std::fabs(target_box_height / prior_box_height)); + } + } + + if (prior_box_var_data) { + for (int64_t i = 0; i < row; ++i) { + for (int64_t j = 0; j < col; ++j) { + for (int k = 0; k < 4; ++k) { + size_t offset = i * col * len + j * len; + int prior_var_offset = j * len; + output[offset + k] /= prior_box_var_data[prior_var_offset + k]; + } + } + } + } else if (!(variance.empty())) { + for (int64_t i = 0; i < row; ++i) { + for (int64_t j = 0; j < col; ++j) { + for (int k = 0; k < 4; ++k) { + size_t offset = i * col * len + j * len; + output[offset + k] /= static_cast(variance[k]); + } + } + } + } +} + +template +void DecodeCenterSize(float *target_box_data, + float *prior_box_data, + float *prior_box_var_data, + std::vector target_box_shape, + std::vector prior_box_shape, + std::vector prior_box_var_shape, + const bool normalized, + std::vector variance, + float *output) { + int64_t row = target_box_shape[0]; + int64_t col = target_box_shape[1]; + int64_t len = target_box_shape[2]; + + for (int64_t i = 0; i < row; ++i) { + for (int64_t j = 0; j < col; ++j) { + float var_data[4] = {1., 1., 1., 1.}; + float *var_ptr = var_data; + size_t offset = i * col * len + j * len; + int prior_box_offset = axis == 0 ? j * len : i * len; + + float prior_box_width = prior_box_data[prior_box_offset + 2] - + prior_box_data[prior_box_offset] + + (normalized == false); + float prior_box_height = prior_box_data[prior_box_offset + 3] - + prior_box_data[prior_box_offset + 1] + + (normalized == false); + float prior_box_center_x = + prior_box_data[prior_box_offset] + prior_box_width / 2; + float prior_box_center_y = + prior_box_data[prior_box_offset + 1] + prior_box_height / 2; + + float target_box_center_x = 0, target_box_center_y = 0; + float target_box_width = 0, target_box_height = 0; + int prior_var_offset = axis == 0 ? j * len : i * len; + if (var_size == 2) { + std::memcpy( + var_ptr, prior_box_var_data + prior_var_offset, 4 * sizeof(float)); + } else if (var_size == 1) { + var_ptr = reinterpret_cast(variance.data()); + } + float box_var_x = *var_ptr; + float box_var_y = *(var_ptr + 1); + float box_var_w = *(var_ptr + 2); + float box_var_h = *(var_ptr + 3); + + target_box_center_x = + box_var_x * target_box_data[offset] * prior_box_width + + prior_box_center_x; + target_box_center_y = + box_var_y * target_box_data[offset + 1] * prior_box_height + + prior_box_center_y; + target_box_width = + std::exp(box_var_w * target_box_data[offset + 2]) * prior_box_width; + target_box_height = + std::exp(box_var_h * target_box_data[offset + 3]) * prior_box_height; + + output[offset] = target_box_center_x - target_box_width / 2; + output[offset + 1] = target_box_center_y - target_box_height / 2; + output[offset + 2] = + target_box_center_x + target_box_width / 2 - (normalized == false); + output[offset + 3] = + target_box_center_y + target_box_height / 2 - (normalized == false); + } + } +} + +void Compute(cnmlBoxCodeType_t code_type, + lite::Tensor *prior_box, + lite::Tensor *target_box, + lite::Tensor *box_var, + lite::Tensor *output_box, + std::vector variance, + bool normalized, + int axis) { + // BoxCodeType code_type = BoxCodeType::kDecodeCenterSize; + // std::vector prior_box_shape = {512, 4}; + // std::vector prior_box_var_shape = prior_box_shape; + + // std::vector target_box_shape; + // std::vector output_shape; + // if (code_type == BoxCodeType::kEncodeCenterSize) { + // target_box_shape = {81, 4}; + // output_shape = {81, 512, 4}; + // } else { + // target_box_shape = {81, 512, 4}; + // output_shape = {81, 512, 4}; + // } + + auto *prior_box_data = prior_box->mutable_data(); + auto *prior_box_var_data = box_var->mutable_data(); + auto *target_box_data = target_box->mutable_data(); + auto *output_data = output_box->mutable_data(); + + auto target_box_shape = target_box->dims().Vectorize(); + auto prior_box_shape = prior_box->dims().Vectorize(); + auto prior_box_var_shape = box_var->dims().Vectorize(); + if (code_type == cnmlBoxCodeType_t::Encode) { + EncodeCenterSize(target_box_data, + prior_box_data, + prior_box_var_data, + target_box_shape, + prior_box_shape, + prior_box_var_shape, + normalized, + variance, + output_data); + } else if (code_type == cnmlBoxCodeType_t::Decode) { + if (prior_box_var_data) { + LOG(INFO) << "prior_box_var_data not null" << std::endl; + if (axis == 0) { + LOG(INFO) << "use DecodeCenterSize<1, 2> axis == 0" << std::endl; + DecodeCenterSize<0, 2>(target_box_data, + prior_box_data, + prior_box_var_data, + target_box_shape, + prior_box_shape, + prior_box_var_shape, + normalized, + variance, + output_data); + } else { + LOG(INFO) << "use DecodeCenterSize<1, 2> axis == 1" << std::endl; + DecodeCenterSize<1, 2>(target_box_data, + prior_box_data, + prior_box_var_data, + target_box_shape, + prior_box_shape, + prior_box_var_shape, + normalized, + variance, + output_data); + } + } else if (!(variance.empty())) { + LOG(INFO) << "prior_box_var_data null" << std::endl; + if (axis == 0) { + DecodeCenterSize<0, 1>(target_box_data, + prior_box_data, + prior_box_var_data, + target_box_shape, + prior_box_shape, + prior_box_var_shape, + normalized, + variance, + output_data); + } else { + DecodeCenterSize<1, 1>(target_box_data, + prior_box_data, + prior_box_var_data, + target_box_shape, + prior_box_shape, + prior_box_var_shape, + normalized, + variance, + output_data); + } + } else { + if (axis == 0) { + DecodeCenterSize<0, 0>(target_box_data, + prior_box_data, + prior_box_var_data, + target_box_shape, + prior_box_shape, + prior_box_var_shape, + normalized, + variance, + output_data); + } else { + DecodeCenterSize<1, 0>(target_box_data, + prior_box_data, + prior_box_var_data, + target_box_shape, + prior_box_shape, + prior_box_var_shape, + normalized, + variance, + output_data); + } + } + } +} + +void box_coder_ref(const std::shared_ptr op) { + Scope *scope = op->scope(); + const OpInfo *op_info = op->op_info(); + auto prior_box = + scope->FindVar(op_info->Input("PriorBox").front())->GetMutable(); + auto target_box = + scope->FindVar(op_info->Input("TargetBox").front())->GetMutable(); + auto box_var = scope->FindVar(op_info->Input("PriorBoxVar").front()) + ->GetMutable(); + auto output_box = scope->FindVar(op_info->Output("OutputBox").front()) + ->GetMutable(); + + auto code_type_str = op_info->GetAttr("code_type"); + auto box_normalized = op_info->GetAttr("box_normalized"); + auto axis = op_info->GetAttr("axis"); + auto code_type = GetBoxCodeType(code_type_str); + std::vector variance; + if (op_info->HasAttr("variance")) { + variance = op_info->GetAttr>("variance"); + } + Compute(code_type, + prior_box, + target_box, + box_var, + output_box, + variance, + box_normalized, + axis); +} + +void test_box_coder(int row, + int col, + int len, + int axis, + cnmlBoxCodeType_t code_type, + bool box_normalized) { + // prepare input&output variables + Scope scope; + std::string prior_box_var_name("PriorBox"); + std::string taget_box_var_name("TargetBox"); + std::string output_box_var_name("OutputBox"); + std::string box_var_var_name("PriorBoxVar"); + std::string output_ref_var_name("OutputBox_ref"); + auto *prior_box = scope.Var(prior_box_var_name)->GetMutable(); + auto *target_box = scope.Var(taget_box_var_name)->GetMutable(); + auto *box_var = scope.Var(box_var_var_name)->GetMutable(); + auto *output_box = scope.Var(output_box_var_name)->GetMutable(); + auto *output_box_ref = scope.Var(output_ref_var_name)->GetMutable(); + + if (code_type == cnmlBoxCodeType_t::Encode) { + // target_box_shape = {row, len}; + // prior_box_shape = {col, len}; + // output_shape = {row, col, len}; + target_box->Resize({row, len}); + prior_box->Resize({col, len}); + box_var->Resize({col, len}); + } else if (code_type == cnmlBoxCodeType_t::Decode) { + // target_box_shape = {row,col,len}; + // prior_box_shape = {col, len} if axis == 0, or {row, len}; + // output_shape = {row, col, len}; + target_box->Resize({row, col, len}); + if (axis == 0) { + prior_box->Resize({col, len}); + box_var->Resize({col, len}); + } else if (axis == 1) { + prior_box->Resize({row, len}); + box_var->Resize({row, len}); + } else { + LOG(FATAL) << "axis should in {0,1} ,but got " << axis << std::endl; + } + } + + // initialize input&output data + // FillTensor(prior_box); + // FillTensor(target_box); + // FillTensor(box_var); // ?????? + for (int i = 0; i < prior_box->dims().production(); i++) { + prior_box->mutable_data()[i] = static_cast((i % 8) + 1); + } + for (int i = 0; i < target_box->dims().production(); i++) { + target_box->mutable_data()[i] = static_cast((i % 8) + 1); + } + for (int i = 0; i < box_var->dims().production() / 4; i++) { + box_var->mutable_data()[i * 4 + 0] = 0.1; + box_var->mutable_data()[i * 4 + 1] = 0.1; + box_var->mutable_data()[i * 4 + 2] = 0.2; + box_var->mutable_data()[i * 4 + 3] = 0.2; + } + + LOG(INFO) << "prior_box count : " << prior_box->dims().production(); + LOG(INFO) << "target_box count : " << target_box->dims().production(); + LOG(INFO) << "box_var count : " << box_var->dims().production(); + + // ToFile(*prior_box, "prior_box.txt"); + // ToFile(*box_var, "box_var.txt"); + // ToFile(*target_box, "target_box.txt"); + + // initialize op desc + cpp::OpDesc opdesc; + opdesc.SetType("box_coder"); + opdesc.SetInput("PriorBox", {prior_box_var_name}); + opdesc.SetInput("TargetBox", {taget_box_var_name}); + opdesc.SetInput("PriorBoxVar", {box_var_var_name}); + opdesc.SetOutput("OutputBox", {output_box_var_name}); + + opdesc.SetAttr("axis", axis); + opdesc.SetAttr("box_normalized", box_normalized); + opdesc.SetAttr("code_type", BoxCodeTypeToStr(code_type)); + + // trans inputs + Tensor prior_box_trans; + Tensor box_var_trans; + Tensor target_box_trans; + prior_box_trans.Resize(prior_box->dims()); + box_var_trans.Resize(box_var->dims()); + target_box_trans.Resize(target_box->dims()); + + auto op = CreateOp(opdesc, &scope); + box_coder_ref(op); + output_box_ref->CopyDataFrom(*output_box); + + // transpose(prior_box->mutable_data(), + // prior_box_trans.mutable_data(), + // {static_cast(prior_box->dims()[0]), + // static_cast(prior_box->dims()[1]), + // 1, + // 1}, + // {0, 2, 3, 1}); + + // row col len 1 --> row len 1 col + transpose(target_box->mutable_data(), + target_box_trans.mutable_data(), + { + static_cast(target_box->dims()[0]), + static_cast(target_box->dims()[1]), + static_cast(target_box->dims()[2]), + 1, + }, + {0, 2, 3, 1}); + + // transpose(box_var->mutable_data(), + // box_var_trans.mutable_data(), + // {static_cast(box_var->dims()[0]), + // static_cast(box_var->dims()[0]), + // 1, + // 1}, + // {0, 2, 3, 1}); + + target_box->CopyDataFrom(target_box_trans); + + LaunchOp(op, + {prior_box_var_name, taget_box_var_name, box_var_var_name}, + {output_box_var_name}); + + // execute reference implementation and save to output tensor('out') + + // compare results + auto *output_data = output_box->mutable_data(); + auto *output_ref_data = output_box_ref->mutable_data(); + Tensor output_trans; + output_trans.Resize(output_box->dims()); + // row * len * 1 * col -> row * col * len * 1 + transpose(output_data, + output_trans.mutable_data(), + {static_cast(output_box->dims()[0]), + static_cast(output_box->dims()[2]), + 1, + static_cast(output_box->dims()[1])}, + {0, 3, 1, 2}); + + output_data = output_trans.mutable_data(); + // ToFile(*output_box, "output_mlu_before_trans.txt"); + // ToFile(&output_trans, "output_mlu.txt"); + // ToFile(output_box_ref, "output_cpu.txt"); + for (int i = 0; i < output_box->dims().production(); i++) { + VLOG(6) << i; + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-2); + } +} + +TEST(MLUBridges, prior_density_box) { + int row = 1; + int col = 20560; + int len = 4; + int axis = 0; + cnmlBoxCodeType_t code_type = cnmlBoxCodeType_t::Decode; + bool box_normalized = true; + test_box_coder(row, col, len, axis, code_type, box_normalized); +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +USE_SUBGRAPH_BRIDGE(box_coder, kMLU); diff --git a/lite/kernels/mlu/bridges/density_prior_box_op.cc b/lite/kernels/mlu/bridges/density_prior_box_op.cc new file mode 100644 index 0000000000..0634114a04 --- /dev/null +++ b/lite/kernels/mlu/bridges/density_prior_box_op.cc @@ -0,0 +1,231 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/mlu/bridges/graph.h" +#include "lite/kernels/mlu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +void inferShape(Tensor* input, + Tensor* boxes, + Tensor* variances, + std::vector fixed_ratios, + std::vector densities) { + auto feat_height = input->dims()[2]; + auto feat_width = input->dims()[3]; + + int num_priors = 0; + for (size_t i = 0; i < densities.size(); ++i) { + num_priors += (fixed_ratios.size()) * (pow(densities[i], 2)); + } + + std::vector boxes_shape = {feat_width, feat_height, num_priors, 4}; + std::vector vars_shape = boxes_shape; + boxes->Resize(boxes_shape); + variances->Resize(vars_shape); +} + +int DensityPriorBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[MLU] Converting " + op_type + "..."; + + auto input_name = op_info->Input("Input").front(); + auto image_name = op_info->Input("Image").front(); + auto boxes_name = op_info->Output("Boxes").front(); + auto variances_name = op_info->Output("Variances").front(); + + auto input_var = scope->FindVar(input_name)->GetMutable(); + auto image_var = scope->FindVar(image_name)->GetMutable(); + auto boxes_var = scope->FindVar(boxes_name)->GetMutable(); + auto variances_var = scope->FindVar(variances_name)->GetMutable(); + + auto clip = op_info->GetAttr("clip"); + auto fixed_sizes = op_info->GetAttr>("fixed_sizes"); + auto fixed_ratios = op_info->GetAttr>("fixed_ratios"); + auto variances_ = op_info->GetAttr>("variances"); + auto densities = op_info->GetAttr>("densities"); + auto offset = op_info->GetAttr("offset"); + auto step_w = op_info->GetAttr("step_w"); + auto step_h = op_info->GetAttr("step_h"); + + inferShape(input_var, boxes_var, variances_var, fixed_ratios, densities); + + auto input_dims = input_var->dims(); + auto image_dims = image_var->dims(); + auto boxes_dims = boxes_var->dims(); + auto variances_dims = variances_var->dims(); + + auto feat_tensor = graph->GetNode(input_name); + auto image_tensor = graph->GetNode(image_name); + + auto boxes_tensor_trans = graph->AddNode(boxes_name + ".trans.boxes", + boxes_dims.Vectorize(), + CNML_TENSOR, + CNML_NHWC, + graph->FPType()); + auto variances_tensor_trans = graph->AddNode(variances_name + ".trans.vars", + variances_dims.Vectorize(), + CNML_TENSOR, + CNML_NHWC, + graph->FPType()); + + bool float32_precision = false; + if (graph->FPType() == CNML_DATA_FLOAT32) { + float32_precision = true; + } + + // ==================== DEBUG ================== + + VLOG(6) << "input_name: " << input_name; + VLOG(6) << "image_name: " << image_name; + VLOG(6) << "boxes_name: " << boxes_name; + VLOG(6) << "variances_name: " << variances_name; + VLOG(6) << "input_dims : " << input_dims; + VLOG(6) << "image_dims : " << image_dims; + VLOG(6) << "boxes_dims : " << boxes_dims; + VLOG(6) << "variances_dims : " << variances_dims; + VLOG(6) << "clip : " << clip; + VLOG(6) << "fixed_sizes : "; + for (auto tmp : fixed_sizes) { + VLOG(6) << tmp; + } + + VLOG(6) << "fixed_ratios : "; + for (auto tmp : fixed_ratios) { + VLOG(6) << tmp; + } + VLOG(6) << "variances_ : "; + for (auto tmp : variances_) { + VLOG(6) << tmp; + } + VLOG(6) << "densities : "; + for (auto tmp : densities) { + VLOG(6) << tmp; + } + VLOG(6) << "offset : " << offset; + VLOG(6) << "clip : " << clip; + + int cnml_boxes_shape[4]; + CNML_CALL( + cnmlGetTensorShape(boxes_tensor_trans->mlu_tensor(), cnml_boxes_shape)); + VLOG(6) << "cnml_boxes_shape"; + for (size_t i = 0; i < 4; i++) { + VLOG(6) << cnml_boxes_shape[i]; + } + int cnml_vars_shape[4]; + VLOG(6) << "cnml_vars_shape"; + CNML_CALL(cnmlGetTensorShape(variances_tensor_trans->mlu_tensor(), + cnml_vars_shape)); + for (size_t i = 0; i < 4; i++) { + VLOG(6) << cnml_vars_shape[i]; + } + + int feat_width = input_dims[3]; + int feat_height = input_dims[2]; + int image_width = image_dims[3]; + int image_height = image_dims[2]; + // ==================== DEBUG END ================== + cnmlPluginDensityPriorBoxOpParam_t op_param; + cnmlCreatePluginDensityPriorBoxOpParam(&op_param, + feat_width, + feat_height, + image_width, + image_height, + variances_.data(), + variances_.size(), + densities.data(), + densities.size(), + fixed_sizes.data(), + fixed_sizes.size(), + fixed_ratios.data(), + fixed_ratios.size(), + clip, + step_w, + step_h, + offset, + float32_precision, + TargetWrapperMlu::MLUCoreVersion()); + + cnmlTensor_t input_tensors[2]; + input_tensors[0] = feat_tensor->mlu_tensor(); + input_tensors[1] = image_tensor->mlu_tensor(); + cnmlTensor_t output_tensors[2]; + output_tensors[0] = boxes_tensor_trans->mlu_tensor(); + output_tensors[1] = variances_tensor_trans->mlu_tensor(); + cnmlBaseOp_t density_prior_box_op; + CNML_CALL(cnmlCreatePluginDensityPriorBoxOp( + &density_prior_box_op, op_param, input_tensors, output_tensors)); + + std::vector nchw_to_nhwc_axis = {0, 2, 3, 1}; + // ============== Boxes Trans ======================= + auto boxes_tensor = graph->AddNode(boxes_name, + boxes_dims.Vectorize(), + CNML_TENSOR, + CNML_NCHW, + graph->FPType()); + cnmlBaseOp_t trans_boxes_op{nullptr}; + cnmlNdTransposeOpParam_t trans_boxes_param{nullptr}; + CNML_CALL(cnmlCreateNdTransposeOpParam( + &trans_boxes_param, nchw_to_nhwc_axis.data(), nchw_to_nhwc_axis.size())); + CNML_CALL(cnmlCreateNdTransposeProOp(&trans_boxes_op, + boxes_tensor_trans->mlu_tensor(), + boxes_tensor->mlu_tensor(), + trans_boxes_param)); + // ============== Boxes Trans End =================== + + // ============== Vars Trans ======================= + auto variances_tensor = graph->AddNode(variances_name, + variances_dims.Vectorize(), + CNML_TENSOR, + CNML_NCHW, + graph->FPType()); + cnmlBaseOp_t trans_vars_op{nullptr}; + cnmlNdTransposeOpParam_t trans_vars_param{nullptr}; + CNML_CALL(cnmlCreateNdTransposeOpParam( + &trans_vars_param, nchw_to_nhwc_axis.data(), nchw_to_nhwc_axis.size())); + CNML_CALL(cnmlCreateNdTransposeProOp(&trans_vars_op, + variances_tensor_trans->mlu_tensor(), + variances_tensor->mlu_tensor(), + trans_vars_param)); + // ============== Vars Trans End =================== + + // cnmlSetOperationComputingLayout(density_prior_box_op,CNML_NCHW); + // cnmlSetTensorComputingLayoutInOperation( + // density_prior_box_op, boxes_tensor->mlu_tensor(), CNML_NCHW); + // cnmlSetTensorComputingLayoutInOperation( + // density_prior_box_op, variances_tensor->mlu_tensor(), CNML_NCHW); + graph->FuseOp(trans_boxes_op); + graph->FuseOp(density_prior_box_op); + graph->FuseOp(trans_vars_op); + // cnmlDestroyPluginDensityPriorBoxOpParam(&op_param); + return SUCCESS; +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(density_prior_box, + kMLU, + paddle::lite::subgraph::mlu::DensityPriorBoxConverter); diff --git a/lite/kernels/mlu/bridges/density_prior_box_op_test.cc b/lite/kernels/mlu/bridges/density_prior_box_op_test.cc new file mode 100644 index 0000000000..88b9a6c53d --- /dev/null +++ b/lite/kernels/mlu/bridges/density_prior_box_op_test.cc @@ -0,0 +1,302 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/density_prior_box_op.h" +#include +#include +#include "lite/core/op_registry.h" +#include "lite/kernels/mlu/bridges/test_helper.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +void inferShape_(Tensor* input, + Tensor* boxes, + Tensor* variances, + std::vector fixed_ratios, + std::vector densities) { + auto feat_height = input->dims()[2]; + auto feat_width = input->dims()[3]; + + int num_priors = 0; + for (size_t i = 0; i < densities.size(); ++i) { + num_priors += (fixed_ratios.size()) * (pow(densities[i], 2)); + } + + std::vector boxes_shape = {feat_width, feat_height, num_priors, 4}; + std::vector vars_shape = boxes_shape; + boxes->Resize(boxes_shape); + variances->Resize(vars_shape); +} + +void prior_density_box_ref( + const std::shared_ptr op) { + Scope* scope = op->scope(); + const OpInfo* op_info = op->op_info(); + auto input = + scope->FindVar(op_info->Input("Input").front())->GetMutable(); + auto image = + scope->FindVar(op_info->Input("Image").front())->GetMutable(); + auto boxes_tensor = + scope->FindVar(op_info->Output("Boxes").front())->GetMutable(); + auto variances = scope->FindVar(op_info->Output("Variances").front()) + ->GetMutable(); + auto clip = op_info->GetAttr("clip"); + auto fixed_sizes = op_info->GetAttr>("fixed_sizes"); + auto fixed_ratios = op_info->GetAttr>("fixed_ratios"); + auto variances_ = op_info->GetAttr>("variances"); + auto densities = op_info->GetAttr>("densities"); + auto offset = op_info->GetAttr("offset"); + auto step_w = op_info->GetAttr("step_w"); + auto step_h = op_info->GetAttr("step_h"); + + std::vector input_shape = {128, 128}; + std::vector image_shape = {256, 256}; + int num_priors = 0; + for (size_t i = 0; i < densities.size(); ++i) { + num_priors += (fixed_ratios.size()) * (pow(densities[i], 2)); + } + + int boxes_count = boxes_tensor->dims().production(); + + float* boxes = boxes_tensor->mutable_data(); + float* vars = variances->mutable_data(); + + auto img_width = image->dims()[3]; + auto img_height = image->dims()[2]; + + auto feature_width = input->dims()[3]; + auto feature_height = input->dims()[2]; + + float step_width, step_height; + if (step_w == 0 || step_h == 0) { + step_width = static_cast(img_width) / feature_width; + step_height = static_cast(img_height) / feature_height; + } else { + step_width = step_w; + step_height = step_h; + } + + int step_average = static_cast((step_width + step_height) * 0.5); + + std::vector sqrt_fixed_ratios; + for (size_t i = 0; i < fixed_ratios.size(); i++) { + sqrt_fixed_ratios.push_back(sqrt(fixed_ratios[i])); + } + + for (int h = 0; h < feature_height; ++h) { + for (int w = 0; w < feature_width; ++w) { + float center_x = (w + offset) * step_width; + float center_y = (h + offset) * step_height; + int idx = 0; + // Generate density prior boxes with fixed sizes. + for (size_t s = 0; s < fixed_sizes.size(); ++s) { + auto fixed_size = fixed_sizes[s]; + int density = densities[s]; + int shift = step_average / density; + // Generate density prior boxes with fixed ratios. + for (size_t r = 0; r < fixed_ratios.size(); ++r) { + float box_width_ratio = fixed_size * sqrt_fixed_ratios[r]; + float box_height_ratio = fixed_size / sqrt_fixed_ratios[r]; + float density_center_x = center_x - step_average / 2. + shift / 2.; + float density_center_y = center_y - step_average / 2. + shift / 2.; + for (int di = 0; di < density; ++di) { + for (int dj = 0; dj < density; ++dj) { + float center_x_temp = density_center_x + dj * shift; + float center_y_temp = density_center_y + di * shift; + boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 + + idx * 4 + 0] = + std::max((center_x_temp - box_width_ratio / 2.) / img_width, + 0.); + boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 + + idx * 4 + 1] = + std::max((center_y_temp - box_height_ratio / 2.) / img_height, + 0.); + boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 + + idx * 4 + 2] = + std::min((center_x_temp + box_width_ratio / 2.) / img_width, + 1.); + boxes[h * feature_width * num_priors * 4 + w * num_priors * 4 + + idx * 4 + 3] = + std::min((center_y_temp + box_height_ratio / 2.) / img_height, + 1.); + idx++; + } + } + } + } + } + } + if (clip) { + std::transform(boxes, boxes + boxes_count, boxes, [](float v) -> float { + return std::min(std::max(v, 0.), 1.); + }); + } + int box_num = feature_height * feature_width * num_priors; + + for (int i = 0; i < box_num; ++i) { + for (size_t j = 0; j < variances_.size(); ++j) { + vars[i * variances_.size() + j] = variances_[j]; + } + } +} + +void test_prior_density_box(int feat_h, + int feat_w, + int img_h, + int img_w, + bool clip, + std::vector fixed_sizes, + std::vector fixed_ratios, + std::vector variances_, + std::vector densities, + float step_w, + float step_h, + float offset) { + // prepare input&output variables + Scope scope; + std::string input_var_name("Input"); + std::string image_var_name("Image"); + std::string boxes_var_name("Boxes"); + std::string variances_var_name("Variances"); + std::string boxes_ref_var_name("Boxes_ref"); + std::string variances_ref_var_name("Variances_ref"); + auto* input = scope.Var(input_var_name)->GetMutable(); + auto* image = scope.Var(image_var_name)->GetMutable(); + auto* boxes = scope.Var(boxes_var_name)->GetMutable(); + auto* variances = scope.Var(variances_var_name)->GetMutable(); + auto* boxes_ref = scope.Var(boxes_ref_var_name)->GetMutable(); + auto* variances_ref = scope.Var(variances_ref_var_name)->GetMutable(); + input->Resize({1, 1, feat_h, feat_w}); + image->Resize({1, 1, img_h, img_w}); + + // initialize input&output data + FillTensor(input); + FillTensor(image); + + // initialize op desc + cpp::OpDesc opdesc; + opdesc.SetType("density_prior_box"); + opdesc.SetInput("Input", {input_var_name}); + opdesc.SetInput("Image", {image_var_name}); + opdesc.SetOutput("Boxes", {boxes_var_name}); + opdesc.SetOutput("Variances", {variances_var_name}); + + opdesc.SetAttr("fixed_sizes", fixed_sizes); + opdesc.SetAttr("fixed_ratios", fixed_ratios); + opdesc.SetAttr("variances", variances_); + opdesc.SetAttr("densities", densities); + opdesc.SetAttr("offset", offset); + opdesc.SetAttr("clip", clip); + opdesc.SetAttr("step_w", step_w); + opdesc.SetAttr("step_h", step_h); + + inferShape_(input, boxes, variances, fixed_ratios, densities); + inferShape_(input, boxes_ref, variances_ref, fixed_ratios, densities); + + auto op = CreateOp(opdesc, &scope); + prior_density_box_ref(op); + boxes_ref->CopyDataFrom(*boxes); + variances_ref->CopyDataFrom(*variances); + LaunchOp(op, + {input_var_name, image_var_name}, + {boxes_var_name, variances_var_name}); + + // execute reference implementation and save to output tensor('out') + + // ===================== Trans From NHWC to NCHW ==================== + Tensor boxes_trans; + boxes_trans.Resize(boxes->dims().Vectorize()); + transpose(boxes->mutable_data(), + boxes_trans.mutable_data(), + {static_cast(boxes->dims()[0]), + static_cast(boxes->dims()[2]), + static_cast(boxes->dims()[3]), + static_cast(boxes->dims()[1])}, + {0, 3, 1, 2}); + boxes->CopyDataFrom(boxes_trans); + Tensor vars_trans; + vars_trans.Resize(variances->dims().Vectorize()); + transpose(variances->mutable_data(), + vars_trans.mutable_data(), + {static_cast(variances->dims()[0]), + static_cast(variances->dims()[2]), + static_cast(variances->dims()[3]), + static_cast(variances->dims()[1])}, + {0, 3, 1, 2}); + variances->CopyDataFrom(vars_trans); + + // compare results + auto* boxes_data = boxes->mutable_data(); + auto* boxes_ref_data = boxes_ref->mutable_data(); + auto* variances_data = variances->mutable_data(); + auto* variances_ref_data = variances_ref->mutable_data(); + + // ToFile(*variances, "var_mlu.txt"); + // ToFile(*variances_ref, "var_cpu.txt"); + // ToFile(*boxes, "box_mlu.txt"); + // ToFile(*boxes_ref, "box_cpu.txt"); + for (int i = 0; i < variances->dims().production(); i++) { + VLOG(6) << i; + EXPECT_NEAR(variances_data[i], variances_ref_data[i], 1e-5); + } + + for (int i = 0; i < boxes->dims().production(); i++) { + VLOG(6) << i; + EXPECT_NEAR(boxes_data[i], boxes_ref_data[i], 1e-5); + } +} + +TEST(MLUBridges, prior_density_box) { + // std::vector input_shape = {128, 128}; + // std::vector image_shape = {256, 256}; + // std::vector fixed_sizes = {8 * 16, 16 * 16, 32 * 16}; + // std::vector fixed_sizes = {8, 16, 32}; + // std::vector fixed_ratios = {0.5, 1, 2}; + // std::vector densities = {1, 1, 1}; + + std::vector input_shape = {16, 16}; + std::vector image_shape = {32, 32}; + std::vector fixed_sizes = {8, 16, 32}; + std::vector fixed_ratios = {0.5, 1, 2}; + std::vector densities = {1, 1, 1}; + std::vector variances = {0.1, 0.1, 0.2, 0.2}; + bool clip = true; + float offset = 0.5; + float step_h = 0; + float step_w = 0; + + test_prior_density_box(input_shape[1], + input_shape[0], + image_shape[1], + image_shape[0], + clip, + fixed_sizes, + fixed_ratios, + variances, + densities, + offset, + step_h, + step_w); +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +USE_SUBGRAPH_BRIDGE(density_prior_box, kMLU); diff --git a/lite/kernels/mlu/bridges/multiclass_nms.cc b/lite/kernels/mlu/bridges/multiclass_nms.cc new file mode 100644 index 0000000000..a749d9b392 --- /dev/null +++ b/lite/kernels/mlu/bridges/multiclass_nms.cc @@ -0,0 +1,250 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "lite/kernels/mlu/bridges/graph.h" +#include "lite/kernels/mlu/bridges/multiclass_nms_api.h" +#include "lite/kernels/mlu/bridges/utility.h" +#include "lite/kernels/npu/bridges/registry.h" +#include "lite/operators/multiclass_nms_op.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +int MulticlassNmsConverter(void* ctx, OpLite* op, KernelBase* kernel) { + CHECK(ctx != nullptr); + CHECK(op != nullptr); + auto graph = static_cast(ctx); + auto op_info = op->op_info(); + auto op_type = op_info->Type(); + auto scope = op->scope(); + VLOG(3) << "[MLU] Converting " + op_type + "..."; + + auto bboxes_name = op_info->Input("BBoxes").front(); + auto scores_name = op_info->Input("Scores").front(); + auto out_name = op_info->Output("Out").front(); + + auto* bboxes = scope->FindTensor(bboxes_name); + auto* scores = scope->FindTensor(scores_name); + auto* out = scope->FindTensor(out_name); + auto background_label = op_info->GetAttr("background_label"); + auto keep_top_k = op_info->GetAttr("keep_top_k"); + auto nms_top_k = op_info->GetAttr("nms_top_k"); + auto score_threshold = op_info->GetAttr("score_threshold"); + auto nms_threshold = op_info->GetAttr("nms_threshold"); + auto nms_eta = op_info->GetAttr("nms_eta"); + bool normalized = false; + if (op_info->HasAttr("normalized")) { + normalized = op_info->GetAttr("normalized"); + } + + auto bboxes_dims = bboxes->dims(); + auto scores_dims = scores->dims(); + + auto batch_size = bboxes->dims()[0]; + auto num_boxes = bboxes->dims()[1]; + auto class_num = scores->dims()[1]; + keep_top_k = keep_top_k == -1 ? num_boxes : keep_top_k; + + // ????????????? + int box_size = 4; + std::vector outs_shape = {batch_size, keep_top_k, box_size + 2}; + const_cast(out)->Resize(outs_shape); + auto out_dims = out->dims(); + + // LOG(WARNING) << "CORE NUM SHOULD BE 4!!!!" << std::endl; + + int core_num = TargetWrapperMlu::MLUCoreNumber(); + + // expect {batch_size, num_boxes, box_size} in compute + // while {batch_size, box_size,num_boxes} on mlu + // while {batch_size, num_boxes, box_size} on cpu + // so mlu data_flow and mlu compute layout mismatch, should set bboxes_tensor + // as NCHW + auto bboxes_tensor = graph->GetNode(bboxes_name); + // expect {batch_size, class_num, num_boxes} in compute + // while {batch_size, num_boxes,class_num } on mlu + // while {batch_size, class_num, num_boxes} on cpu + // so mlu data_flow and mlu compute layout mismatch, should set scores_tensor + // as NCHW + auto scores_tensor = graph->GetNode(scores_name); + // expect batch_size, keep_top_k, box_size + 2 in compute + // while batch_size, box_size + 2, keep_top_k on mlu + // while batch_size, keep_top_k, box_size + 2 on cpu + // so mlu data_flow and mlu compute layout mismatch, should set out_tensor as + auto out_tensor = graph->AddNode( + out_name, out_dims.Vectorize(), CNML_TENSOR, CNML_NCHW, graph->FPType()); + + // trans bboxes {batch_size, num_boxes, box_size} + auto bboxes_trans_tensor = graph->AddNode(bboxes_name + ".trans.bboxes", + bboxes_dims.Vectorize(), + CNML_TENSOR, + CNML_NCHW, + graph->FPType(), + CNML_NCHW); + // trans scores {batch_size, class_num, num_boxes} + auto scores_trans_tensor = graph->AddNode(bboxes_name + ".trans.scores", + scores_dims.Vectorize(), + CNML_TENSOR, + CNML_NCHW, + graph->FPType(), + CNML_NCHW); + // trans out {batch_size, keep_top_k, box_size + 2} + auto out_trans_tensor = graph->AddNode(out_name + ".trans.out", + out_dims.Vectorize(), + CNML_TENSOR, + CNML_NCHW, + graph->FPType(), + CNML_NCHW); + + std::string out_num_name = "nms_out_num"; + auto* out_num = scope->NewTensor(out_num_name); + std::vector out_num_shape = {batch_size, 1}; + out_num->Resize(out_num_shape); + auto num_outs_tensor = graph->AddNode( + out_num_name, out_num_shape, CNML_TENSOR, CNML_NCHW, graph->FPType()); + bool float_precision = false; + if (graph->FPType() == CNML_DATA_FLOAT32) { + float_precision = true; + } + int64_t workspace_mem_size = + 4 * std::min(static_cast(batch_size), core_num) * + (14 * num_boxes + 8 * class_num * num_boxes); + int64_t workspace_fp_size = workspace_mem_size / 4; + if (!float_precision) { + // when run as fp16, mlu size will be half of cpu size, so workspace_fp_size + // should be double + workspace_fp_size = workspace_mem_size / 2; + } + std::vector workspace_shape = {workspace_fp_size}; + std::string nms_workspace_name = + "nms_workspace"; // expect only one nms in same model + auto workspace_tensor = graph->AddNode(nms_workspace_name, + workspace_shape, + CNML_CONST, + CNML_NCHW, + graph->FPType()); + std::vector workspace_cpu(workspace_shape[0]); + // void* work_space_ = nullptr; + // cnrtMalloc(&work_space_, workspace_shape[0]); + VLOG(6) << "workspace_shape :" << workspace_shape[0]; + // VLOG(6) << "workspace_shape mlu ptr :" + // << reinterpret_cast(work_space_); + + // =================== Bboxes Trans ============================ + std::vector bboxes_axis = {0, 2, 1}; + cnmlBaseOp_t bboxes_trans_op{nullptr}; + cnmlNdTransposeOpParam_t bboxes_trans_param{nullptr}; + CNML_CALL(cnmlCreateNdTransposeOpParam( + &bboxes_trans_param, bboxes_axis.data(), bboxes_axis.size())); + CNML_CALL(cnmlCreateNdTransposeProOp(&bboxes_trans_op, + bboxes_tensor->mlu_tensor(), + bboxes_trans_tensor->mlu_tensor(), + bboxes_trans_param)); + // =================== Bboxes Trans END ======================== + + // =================== Scores Trans ============================ + std::vector scores_axis = {0, 2, 1}; + cnmlBaseOp_t scores_trans_op{nullptr}; + cnmlNdTransposeOpParam_t scores_trans_param{nullptr}; + CNML_CALL(cnmlCreateNdTransposeOpParam( + &scores_trans_param, scores_axis.data(), scores_axis.size())); + CNML_CALL(cnmlCreateNdTransposeProOp(&scores_trans_op, + scores_tensor->mlu_tensor(), + scores_trans_tensor->mlu_tensor(), + scores_trans_param)); + // =================== Scores Trans END ======================== + multiclass_nms_param_t params_; + create_multiclass_nms_param(¶ms_, + score_threshold, + nms_top_k, + keep_top_k, + nms_threshold, + normalized, + nms_eta, + background_label, + batch_size, + class_num, + num_boxes, + box_size); + + cnmlBaseOp_t multiclass_nms_op; + create_multiclass_nms_op(&multiclass_nms_op, + params_, + bboxes_trans_tensor->mlu_tensor(), + scores_trans_tensor->mlu_tensor(), + out_trans_tensor->mlu_tensor(), + num_outs_tensor->mlu_tensor(), + workspace_tensor->mlu_tensor(), + float_precision); + + graph->BindConstRawData( + nms_workspace_name, workspace_cpu.data(), workspace_cpu.size(), true); + + // =================== Out Trans ============================ + std::vector out_axis = {0, 2, 1}; + cnmlBaseOp_t out_trans_op{nullptr}; + cnmlNdTransposeOpParam_t out_trans_param{nullptr}; + CNML_CALL(cnmlCreateNdTransposeOpParam( + &out_trans_param, out_axis.data(), out_axis.size())); + CNML_CALL(cnmlCreateNdTransposeProOp(&out_trans_op, + out_trans_tensor->mlu_tensor(), + out_tensor->mlu_tensor(), + out_trans_param)); + // =================== Out Trans END ======================== + + // =================== DEBUG ==================== + VLOG(6) << "bboxes_name: " << bboxes_name; + VLOG(6) << "scores_name: " << scores_name; + VLOG(6) << "out_name: " << out_name; + VLOG(6) << "background_label: " << background_label; + VLOG(6) << "keep_top_k: " << keep_top_k; + VLOG(6) << "nms_top_k: " << nms_top_k; + VLOG(6) << "score_threshold: " << score_threshold; + VLOG(6) << "nms_threshold: " << nms_threshold; + VLOG(6) << "nms_eta: " << nms_eta; + VLOG(6) << "normalized: " << normalized; + VLOG(6) << "bboxes_dims: " << bboxes_dims; + VLOG(6) << "scores_dims: " << scores_dims; + VLOG(6) << "out_dims: " << out_dims; + VLOG(6) << "out_dims: " << out->dims(); + VLOG(6) << "batch_size: " << batch_size; + VLOG(6) << "num_boxes : " << num_boxes; + VLOG(6) << "class_num: " << class_num; + // cnmlPrintTensor(bboxes_tensor->mlu_tensor(), CNML_TENSOR); + // cnmlPrintTensor(bboxes_trans_tensor->mlu_tensor(), CNML_TENSOR); + // cnmlPrintTensor(scores_tensor->mlu_tensor(), CNML_TENSOR); + // cnmlPrintTensor(scores_trans_tensor->mlu_tensor(), CNML_TENSOR); + // cnmlPrintTensor(out_tensor->mlu_tensor(), CNML_TENSOR); + // cnmlPrintTensor(out_trans_tensor->mlu_tensor(), CNML_TENSOR); + // cnmlPrintTensor(num_outs_tensor->mlu_tensor(), CNML_TENSOR); + // =================== DEBUG END ================ + graph->FuseOp(bboxes_trans_op); + graph->FuseOp(scores_trans_op); + graph->FuseOp(multiclass_nms_op); + graph->FuseOp(out_trans_op); + return SUCCESS; +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +REGISTER_SUBGRAPH_BRIDGE(multiclass_nms, + kMLU, + paddle::lite::subgraph::mlu::MulticlassNmsConverter); diff --git a/lite/kernels/mlu/bridges/multiclass_nms_api.cc b/lite/kernels/mlu/bridges/multiclass_nms_api.cc new file mode 100644 index 0000000000..d5de834d27 --- /dev/null +++ b/lite/kernels/mlu/bridges/multiclass_nms_api.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2020 smarsu. All Rights Reserved. + +#include "lite/kernels/mlu/bridges/multiclass_nms_api.h" +#include +#include +#include +#include +#include +#include + +extern "C" { +void multiclass_nms_paddle_entry(void *bboxes, + void *scores, + void *outs, + void *num_outs, + float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + int batch_size, + int class_num, + int num_boxes, + int box_size, + void *work_space, + DataType data_type); +} // extern "C" + +void create_multiclass_nms_param(multiclass_nms_param_t *params_ptr, + float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + int batch_size, + int class_num, + int num_boxes, + int box_size) { + multiclass_nms_param_t params = + (multiclass_nms_param_t)malloc(sizeof(struct multiclass_nms_param)); + params->score_threshold = score_threshold; + params->nms_top_k = nms_top_k; + params->keep_top_k = keep_top_k; + params->nms_threshold = nms_threshold; + params->normalized = normalized; + params->nms_eta = nms_eta; + params->background_label = background_label; + params->batch_size = batch_size; + params->class_num = class_num; + params->num_boxes = num_boxes; + params->box_size = box_size; + *params_ptr = params; + + return; +} + +void destory_multiclass_nms_param(multiclass_nms_param_t *params) { + if (*params != NULL) { + free(*params); + } +} + +int create_multiclass_nms_op(cnmlBaseOp_t *op_ptr, + multiclass_nms_param_t nms_param, + cnmlTensor_t bboxes, + cnmlTensor_t scores, + cnmlTensor_t outs, + cnmlTensor_t num_outs, + cnmlTensor_t workspace_tensor, + bool float_precision) { + DataType data_type = kFloat16; + if (float_precision) { + data_type = kFloat32; + } + + if (nms_param->keep_top_k == -1) { + nms_param->keep_top_k = nms_param->num_boxes; + } + + cnrtKernelParamsBuffer_t params; + cnrtGetKernelParamsBuffer(¶ms); + cnrtKernelParamsBufferMarkInput(params); + cnrtKernelParamsBufferMarkInput(params); + cnrtKernelParamsBufferMarkOutput(params); + cnrtKernelParamsBufferMarkOutput(params); + cnrtKernelParamsBufferAddParam( + params, &nms_param->score_threshold, sizeof(float)); + cnrtKernelParamsBufferAddParam(params, &nms_param->nms_top_k, sizeof(int)); + cnrtKernelParamsBufferAddParam(params, &nms_param->keep_top_k, sizeof(int)); + cnrtKernelParamsBufferAddParam( + params, &nms_param->nms_threshold, sizeof(float)); + cnrtKernelParamsBufferAddParam(params, &nms_param->normalized, sizeof(bool)); + cnrtKernelParamsBufferAddParam(params, &nms_param->nms_eta, sizeof(float)); + cnrtKernelParamsBufferAddParam( + params, &nms_param->background_label, sizeof(int)); + cnrtKernelParamsBufferAddParam(params, &nms_param->batch_size, sizeof(int)); + cnrtKernelParamsBufferAddParam(params, &nms_param->class_num, sizeof(int)); + cnrtKernelParamsBufferAddParam(params, &nms_param->num_boxes, sizeof(int)); + cnrtKernelParamsBufferAddParam(params, &nms_param->box_size, sizeof(int)); + // cnrtKernelParamsBufferAddParam( + // params, &nms_param->work_space, sizeof(void *)); + cnrtKernelParamsBufferMarkStatic(params); + cnrtKernelParamsBufferAddParam(params, &data_type, sizeof(DataType)); + + cnmlTensor_t input_tensors[2]; + input_tensors[0] = bboxes; + input_tensors[1] = scores; + cnmlTensor_t output_tensors[2]; + output_tensors[0] = outs; + output_tensors[1] = num_outs; + cnmlTensor_t static_tensors[1]; + static_tensors[0] = workspace_tensor; + + cnmlCreatePluginOp(op_ptr, + "multiclass_nms_paddle", + reinterpret_cast(multiclass_nms_paddle_entry), + params, + input_tensors, + 2, + output_tensors, + 2, + static_tensors, + 1); + + cnrtDestroyKernelParamsBuffer(params); + + return 0; +} diff --git a/lite/kernels/mlu/bridges/multiclass_nms_api.h b/lite/kernels/mlu/bridges/multiclass_nms_api.h new file mode 100644 index 0000000000..25447e99f5 --- /dev/null +++ b/lite/kernels/mlu/bridges/multiclass_nms_api.h @@ -0,0 +1,82 @@ +// Copyright (c) 2020 smarsu. All Rights Reserved. + +#ifndef LITE_KERNELS_MLU_BRIDGES_MULTICLASS_NMS_API_H_ +#define LITE_KERNELS_MLU_BRIDGES_MULTICLASS_NMS_API_H_ + +// #define ALIGN_UP(a, b) (((a) + (b) - 1) / (b) * (b)) +// #define ALIGN_DN(a, b) ((a) / (b) * (b)) +// #define DIV_UP(a, b) (((a) + (b) - 1) / (b)) +// #define DIV_DN(a, b) ((a) / (b)) + +// #define MAX(a, b) ((a) >= (b) ? (a) : (b)) +// #define MIN(a, b) ((a) <= (b) ? (a) : (b)) +// #define ABS(a) (((a) > 0) ? (a) : (-(a))) + +// #define INIFITE 0x7F800000 +#include +#include + +enum DataType { + kInvalid, + kFloat32, + kFloat16, + kUint8, + kInt8, + kInt16, + kInt32, +}; + +enum TopkSplitStrategy { + kAuto, + kSplitN, + kSplitC, +}; + +enum ColorType { + kGray, + kRGB, + kBGR, + kRGBA, +}; + +struct multiclass_nms_param { + float score_threshold; + int nms_top_k; + int keep_top_k; + float nms_threshold; + bool normalized; + float nms_eta; + int background_label; + int batch_size; + int class_num; + int num_boxes; + int box_size; +}; + +typedef struct multiclass_nms_param *multiclass_nms_param_t; + +void create_multiclass_nms_param(multiclass_nms_param_t *params_ptr, + float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + int batch_size, + int class_num, + int num_boxes, + int box_size); + +void destory_multiclass_nms_param(multiclass_nms_param_t *params); + +int create_multiclass_nms_op(cnmlBaseOp_t *op_ptr, + multiclass_nms_param_t nms_param, + cnmlTensor_t bboxes, + cnmlTensor_t scores, + cnmlTensor_t outs, + cnmlTensor_t num_outs, + cnmlTensor_t workspace_tensor, + bool float_precision); + +#endif // LITE_KERNELS_MLU_BRIDGES_MULTICLASS_NMS_API_H_ diff --git a/lite/kernels/mlu/bridges/multiclass_nms_impl.o b/lite/kernels/mlu/bridges/multiclass_nms_impl.o new file mode 100644 index 0000000000000000000000000000000000000000..5da75ef56a00d936d76abfd5f3caf7ae35f3594a GIT binary patch literal 400888 zcmeFa3zQwjbtqb+842Nl8LoaKX(S!lQm*19fds}L8ObzQ$TG-y5CY3s5eJaX1WBME zfn>n2`6R|Kc`Mw9730`WCW%>>w;s7yH#RRyOyc8+Al5b4OY0hpZt#mwY_1o}#tzHo z7Yx#??%KP$YoF>q)m1&3Iq00VX1c2Dvp=X5 zH&4>(G2Hv)-~YaS%h%$UufMS%ek8v>3S^&fYFw56oiZ}ri1&@mHsVu9W*Bi@&@}*a zOx7s)aBP~YV9C!D(!^1WFH&*hZ8Y`gCx^g?fBig6HBe>r3u_$i#4?S68Bxkv~Qq?IJ|+K#zzf-Bl8QRyl7!mfSWPR=#mHZ@;zx%?v-{wzmx_U>QQ7bpL>tl|H`#rqYV6s(tfFRBKgo>{@DPnxcG0U#m|5Hqn7^X zz&}~=O;wAi5BiLsabaw~A>MviasH!P`TfWBcenPxCDie7&_)gTKn~KLmeSZLt0{P;=`)TK@(2p9SUh zUk2Yq{zCZ|Qa)IIF6lJ+$#7otXzBlktPrQB*+J55e0%QwBaVwbokKZ`_ewCC@ zGJ2JlC2L9PAt(L45$9{QbJTbfAp`15J;xuItZLl!VP?N1Nag&yH+}yN3O>ck+vRtd z;lC;ze^tY#iy3^v%l5S4|6Nwzj(=L4N*@ZxANl_=FZl1R4E|+a@RwHx|KncpmsSS< zlV0$bR0jVlFZheX;j0=pUF!wE77kx^sZG~;!5<8VkMwWzf{%v0Hr&b34PA~X`U$+%r__hn{Iy1-`I<=+ zZ~jWSIX>_GOSb>z*nV5E{-sM!nM|9p?LhX{(j0H1K_|Y?pH{#=e_Sz>jhL2KCS4d_y^V+{ZnkbGMP^UrHh3-4F8-tYs~tm*UWfM|JC)? zZdLUyMF8RaKU#|aKfFV&7py~p2#5cJQt*$RtKd(sjQ*EO!T;ga3jR=K^#4mK_zTWc z@CPcR|792Wsr+qI@<;n$;qBjafsfP*@z(i@{#s@9zg`Odyqgq!TEY&e{}-j;fAuN_ ze|cr}yRE+=IQWw}O5dGQ8T_9X(*M>Kir#fO@TLuL{fQZ^58L=-?f*c<8+9e`b$r3w z`|qz;_)-4irwx87{)^3X6}+uE_>_n5)c~N$qsrjh?a$gIHNrODQu=0< z2@fzT8Y@lvn%>)=Hu_&I9Dley{@N!MKJP^U;r*}N{?W7Z6@2n(3y1$gA^l(cfPy!g zL+@+DD&5X+{?^8SH~Ayi&Bl=OKmSS&J{+PMx4|!_pW6Rn29NYn^RiI--QWWneDw`V z-`vmeG3m^U+32_TzqAH5AAf|w)9X)nl$n3~!{5)~wwwAO8+0I48EKEHSF+U{_Xd%^*hXH zec0%CgD*tms<_J0J0DHfcNby(Bo9__EiY~G-ze0-q<^p~Sg>2s`*>yW-ROTR2i~#I zhUz~z_=j`gFBqf#kKp7(>A$CQ;Q!g3V@vLLFmG?@{M!va zf)_Ip-k=>H%9iuA^}kn(`X9{LuIM{r!qe;FwX;I)PecA;#!>~3_9SX&8uJU7f%=#1 z0XRdYKWXBp>EHM+Q$BYuG~>4{Gvm6THDKdkDg7mDRsCr*)*r4vi`S0`UP_LFzo|0# z<<{>ApN*BlFSmY4@P{jdUvB-D;IFR?{=XFUUkOFEH-e)$`~kiQ+P@PZJwQ9le|y?? z)!(bzzxkyT|Mw!lS$~IVdqee)+xiuPmk%WmyY2X-*{mU({wrQTB6w+a3jUJH;JeMA zIs85^_-QRNq^4jaA9ZT~w4|bcs51B$dcmibWb&Az**=8y|Be^@`zoXVYA^VISQ-4; zUhuzA8T?Op!GEwa`18Erf2}h3E4|>qSQ-3Fz2Lu88T^F_d|EFSTTOqS{^e$TfoV^R zt1dR|Yl)TBUO;T)hf03|-;~oIq;+KBZ(s$YaQ>m;QEr0or$21){l+h^uJUdXtizp{u`_OQE~Z9(t$3aidxEN zvgQV(lf)6ic&9u>oBdhggS;mq`BD4VFb-Gm=qoM%{a9GU^52=~cmEtoAbt=!{g?27jvc>_GynB?C(c&={b6Kbc>8~f%^#?39G;2( zIKuRYE@$+A{gwi{|MUCHtm7w@l24WX2)+rYpRXTeK(!AJeCOM{v;OPtOSI+~p46g0 z>oON(Uj$eZrbZZ2j|0$W#GUx3<*9L2==hP_{svYBkzt4VL#DW*&td(9F)&j_9L$wJ zmd}XNtV_Ps^G7%Tv9ub6|9*TCF26fi|8;7AjeaJS=u?%VSIFo%oh zG?#Z~{~_@$X#WY_f4+tSQGmZiWNiJ{Z~gv&7kmRHw7>k;4c2u2jz#ipBw*P#j3pQcdZ#ZX8ui#)BYWP{nyt1cKz{Q zgvi?$*TaDjsxK?!ze1≀q-CXVBjy4?hD!OT?Z2w5TDEFfgRnKm6=>8+^WgZNQUg z*yDclCmVdX{ddlfRrrF-N1Cr~2-}~}-|X~aZ7?{Z`2VsOd>j4Gd)c3U_OA`T-~LVH zML!l&59EI-`y1x1Q~ZC)3%-s1!(RMf;|1SFzuNyxQwrrzOut>xzpgU)mwVBV;9<^Y zg^w%#q)+_tx3wSlpGz@u4JkpS+{OGy{NX#8-D~_OPy#x3{caR#D5rmI?dRuz*XIQv z3#kX{A3y(}`zwR*wtoCwgvaG=)=u943>tGHQ*nDx-s0C8`#%{vRR5&&grbH1!u|_i z`jGOM@RJ{#{Qks6f9m~Gafv#Q^A)71T>r_iLGM4Tw!)|V-})u$zK=A%_#JY;2Xsvr znD>F`_b!3@i<9p;cm-R3D&)VcRi%$&{R#ao^UnxVGmn;6bn7Tzzs-~>4r2d5-^#zV zXo9a^?;YKv)~hr+x9l+PLrTjzgyMPNz#Dn%{|*%-S!y2RmoVh7h5t#t5g`D!4MM&C z%l99tlA<%ITWye6U+ag zkwGu{)751IHT(WiHj4~5SX_`qdg~vxMKw_fo{Evv@7T(7RUi`Q9e=q+%-T%d| zfc~Gz?^ZT`5!DXQD)h%S0s3QDoYt88dXD~Zz(3A1^`)Hh(eI}EC%>*%{ga=!C8gW{ zs~P_*?Z4LX52)?xHzo3_`(NXl(EfKv#r=;kb$AZLY0bHL^6yYS&N4Nx|8@J1!NGy_?0<9~yNUM!==c`%eB-US|3&LBZu>3^t=QZm{LAmh{3#o@PU`OoxUzGD0eve{#4!r&r{{<@9_KEO6QnY^r zmtUgJn^A4dUrqbdQ98+IyZDQkp&b8g`n%Nqc@nD9AJ10?zuf&<1b?(L_l6`g10uC1~S6_rJ3gYx9a`(kh$6FD+t^ zlL{}_f74oJ2=$Mf|Hzke#+x_K2suA3zd|_S?hi0{tQg6{+^URu-o+a>M!a4G!M_uzm64#)9*L`YE%Z_&wqVQW$^vhU!uz3 z`>lWY%|C7O@8`eoH~+T5x1S$D_R5ES{PpucPHUAR9DhsQp9RxuA^zJo`u*(3B`5g$ zFYQ-^>pwsHc{7SqIDEhM-&7fVKmEV4GWdS;$KlH0`;9;AD}(Q(f8jm${toj8O?v{- zYV$aP*FWv<*!{Q0Q6$l%A7(}B{%Uf)Y_2x_r<<#l|1{ly)m)YMb8CMXae=x|Dz}>OYS46MkaDKI8k01h&M}W&Q(HUvp6L!%u%wEy)wd z&rh)RGb2yi$hj^4=fy=Ll{`g^Vuujh+5D-vyd~A*KN?ZxGW+3UQ!Ryr$n#%M+8=b0 z4W_?-^L*uhMRV5X>ck&fXD=C8GJ0||$A7}PCMG{rF6yac5T?S*e{__-cuMKXvj>Ej zmSL8BX_fy>A(KDIk4$>0U_bQc(J$K&I~lxkTCN8+2ra z1LzN=Uz7|y7jphU!LsP@dMg$UdId*6Ou^(C;QuenpL+;u{sGF_8U5#0{Jtt-UH19h zq0JrMe}+4v|7^yF!S!aKc52QG=|9csL%rV}=l;X@Ux>2K=s#}^g^oWHt^e5P57a7h zCY;>AP4Ss$@$c`N={!HiVROj(2LYF}=ZWXP;zYj~yY&w@_-sC?Ia(r+Gyigf&+O?E zV0Bi6&+S(?_{<*Bm6izPjQ?)%nLaN8R%b={N`Jyz*rC+-5#*YWibr@pi{b*mze)Qi zuuJK$cd`CR>GHl5b^Z(MtK<1Gs*lc_X{^cw`ge88`elYH)xQ64i2s$YZTSTBzq8A4 z4RNOZ?l&=Bk9^0!9wooQ`N<)5D)$jh$Yj~I_6;<ZQpKtlJi(^rhZBbzP$-KNtKexZGyl9_s z-_<4-FNJ%Y`QCEyH!*pibAgFZY4~uE%|Gq*laO$I#qn{550g=UUFNC3`s4DW@?xSy ze^7^tRC|ENgY^%NFK%rYw>SC^r-;;H|Zn(_o01<{03db^k3GmO9U+V z=alZfH%(|DLSlT*ZxAB=k>TG>ubrN?I9D_jr2k>-fuQo@_mc8(Dq>9g#i!`_zDe=; zr<9cp;C?ctOMMfkXMTzNz#cXqpbRsaynKo2bIRxTTov2@cbV|rN~XDB4oOn_u1tuR zxA&j4Ml-#i@q_vs&wpzUKldz4?2|NmsZi(hZ(79t7ft@=@Duju@Us;AM120Kv*d5) zQsv(`goM(3%#H4h{{04(_z|;TocK?LML&MeK)+z?gL&=udzG{wNd4Ox-@oPjnTqs> zx1ZnnkslyJ;qYJglAj}$!S}nr;_1rZ`<=f&Tp4`7{`*vA@Q?bmKm2J(|L;fSMZA7? z_uUi7et^MYV?2Wy%s%MX<6RUbRIGZKegE1`|NcP;|A>78y82T%=}-7SbND?Q&tqzq zx&?3lhUqR1D z>(>UIf2cKj1MhzoqDFILjbjp!{s))PGBk1cBy|Rn*B_;wR=LGLGtfKEC15chCY-0Gsow7H}L%jBfqT19{&@2 z`=nr}z zek>2~u=-aT^`+Ic4I%!czWaArzHx1q{))L97WiPie=wM$RT#?R|0Lnh*Ix>2$R;BGmpb8}vt9*%8jL?*zsjOm z7)Jcv?hngZ?}Palhw`7V{}tk#O+@^E*NOjWK|{XaPlNIIyT48vZc&0$ynf>&zk2`E z2|nD3;?B)$r;OC4-X}0zugzqF&Qa^rfXOZ6?Eu;Xl%A)r zbJF4R>l8g{dQr%Rf2sRJ;TgOyYa@aQAHPc7p9(kMs_5NR8T?ZB$HFV?75vSW!7q1z z?(9Le-{RMQZ0%o4f56<^mA{wY_+^9dG=9qaO^@9fv%ERH_@sEf(9eFc!7pb&^$)21 z_vN_$7B2rz^rN)J4>i8kDueH*zXvOW?>Byn%HaQ^jQy(l|0cFbc>Dj<3%=j{$tM4P z_t$T&jDDy61B4bol>85P!Ov8q*Ix_g&l(<55eW@D%-&>jf;p4v}>vXogPUnXy`^-Uw|Ks=~Tz{1tKdJwYxBNcoML%&d zdE)xpC%p7Goo6k_e;fS5@BhgH_WAHBUh->$f3p|;{guJz_h;DYO)*#L(<_7Tw|_=X z&*AOww|}*$GWyT=YX7B`!7nxc*Zsjyf2UF;L)iaIt$&K<3UyzHpZ@c=ADW0dm$zvx z=N}Ckiy}q)5Adu?<@-PIE!qj!y`_KM*@2!E@99bo}H*vtP*`81mA;Hu-a! zKf)UbkMSF3dFh`us9lo#118e>NhySG@>ly0D6dGrK3}!LJbwo2`ztat>W}S-{?O;M z8kef`XAxE>_j8l`k?8za{jU;xA27LP{h9WoDIM|G_1Bk~=fCQ|Yo7n2^t60&C`yv+ zlSLB+Lfk^85EgXeA7EzDSQd7E?vn?v57S>lp1-2OoRAU`5hveNd;YAx%shVvXM~+U z+tmC0G5h{bguN8LB&CWryfu$h07TIFv*pvoBi8d%QxG=OH)7V1asCYUtqeJTHZ%eK zPZ@)AqY$S^{vELLFD+*FN3O45Yq@{h%%7(ZPs(YZ6s}7Mq+0AO|6k&V-}r0rL~(FL z`M=P3BG2n2`afEv@{V4i)_ZjN78R$OZdu`X{_HjTcuv*2J$G)A{bxeme<1O8li&ZZ z3i+*_8|#mWXukk8Ao+)(I0-&qnbop?O(oktQGPG?T0g+rVa8deJhz3aGoS$1<@qD| z?#DUpIcIo@abEfFHawa=uMD$>?`QX4XaEj@;aL91|NgKR`y~hU-ddVK7g&i>-=Brc z(f?vP*2Lv01<*Ex@>9726-d4XrpAEzy}mJB36XP1#*pzo{u>3{F}7hw-`HJR8p+eB|!{wg2~7`)4q0 z3EK1r_g`-*RQv)Cu=NAgzZNO(@bzyoy+-<1EwMEI+*kj`Xw@c${a|p&)GwrOelv$3 zvufsgv{6;t$mZC79)J7Ez|&5Dnk7kxJbtA4Ttd((^WR*0|9Rn~$$Zs?+SWXA`%eJ- z*+9&x{dI{}`7bU%b4blM{Oo6&{!{*kdG^iB<>q?Xyv^o(=a0|k>u1IIlRh8@%r7;6 zvf*#qAN|{n`>PQ?3r9Z}cWUW&&* zps2jhF8*Ex{ovOt&N2G;{Ov==eEN%LnCZlCsvsv0E9~u&7IO+s#QVdF`_C;ql)m)y zU$ONcuk&m0zDoEHoict&X;Sm$`m#qwEZ^hBe`vcz zjrjRbQcbfr@%f7z{(k;vHvakf-}>>-2H&s!{QUoH@cq`m2E63Q2H)@ev7i5~4gMP? z);~yU%_k1!_|i#!fjn^d{<>LnyBKuP_@B@M)A0QI<477`k1M9w)_(T>yC|oA^54f~ zn6XK&pA`emOGW+XxBqs${f9rCV{gu7_tWPQYrcQ%tz)OfZ#Kp2M-y@XHG{6TKLYLN zi*o1+iKcV^8a{s-Zf5~Q9%#kydzn!1_+xmYq z(ck3%C;jQN_p^(cx2XP|rYHB&12s{KNu9 z5BVcDznsvMrr*KwCsZAt^{4vIJ$Nfye|IQnW~q6Me_~aZe=YkLkqIG=wYkhMu6q7Nf+4r9HG;#XnaLH5)BYdU58f*<}| zo!@WFN&Hu}hith0q~@>G2I#M(?+a)Q%t-DFpvIZO{FNGyEH>6BseQD3+Wo0@^G+L3 z`c%KjX>XqXi7MJ(N?(WWe~HJDAyu_M=P!$EeOJFwtxx;GPivJS)q*2x|2vfzHNKI( zP+Ob811i}VFvs)SEr~xpCAWR{B{^zS#0w{tkU!2K&4Iz7zKE7X34N zkvhcKctq)g6#wGTy-9Mm|6iZiiTWN%9}^D^0(!jQvfu;B$ENoe$5;#s73Egb|g9X|Vl32$%C8TE6ML zm!6MaSZ@4EaflBDHQpim|Eml9v>57-zeNEK8^7ZH_s;MiB7M<3Wa^9eooU81jA|dO zFP(Dk!NBvou@hR${3&XM-SR4L{%h4gfXyw$+C*RJ{!=?6p#LQE#|qBBwa}lCpUxk1 z`d*w19OsX<2h@1ceAaC5D}vhpkxpsd0Vzd+5f4!F83>`>~WBZRjXK(-S zciX?j`W0jU@#_CK&}hlv%cyhzUvzp>xcPOn|KDA2 z{eOR727fci+ke*zY|pPNdyVXODn<4-t*5unAME2N=}TCV@Jo-Ma=k31|492YZ~Tl- zkEC^_@P_x?mD2}6h$*IJYQ>+(mrvFH^n&<7@lwzGWBd6?p8cQ2fBoG#Z3uS!;a<}| zmD$v9^a{g2Qq13I`a@Dax97%q{bhprzZYYUGr)B_;%v!{uEJ^kiev=mdq7Zv+J8XQ zH|5GNK`4AK*?;m+^W^8!I?f-Z%sx%lE~`z>AK@gTO{Ll8pYl{qe?3?J8K(B=aic#4 zUt#|r8oi-DJ5PV55DP<7bKC!F(tq&R`5qdw{mGRM!PwhB#i2s_6aAM*zpMU+Q%N=z zj>jASQ2jlQ_>YPFmE`}KdFyvkD!+N!OEx$NahgZ_i&ZGlwG}@8Cs=hClYIX? zigUtW|K8KTpVJ3J`yZTnY6<%ZN52@U{qXeK_$h$-?>V)f7)r`Y29)BT0slSc2f%-C zIQ;b&1i)V&4*!M^1;Ae#4*&i&0q~c2!B-qlitfh>gwy?PlvzHNd@jN_<>b!>-|znQ zT4nJ4?%$^+zi|4^^J~OkBu_r<`as`5UaJ4w_^Oz zph{P(VN(C0e@2>K6w2toN8r^DCt11vEh=wMn`FGU;t&6DvdTpJ6%m%HEe;v~U`nU# zU!*Kj{7LCg);sd|msTYEOM1Pcu~f~UYird7ij4;Y*1r%bv;9&`Z6PH=^8Ug)Sv^ko z7XsBas_dKmILZ~rAY@+&v7@@l-)TJ(`xZz>@DjbndHt^evP%=OSbdxy%C{E>ehw3JVC+CqKFOPDNx zI21YQ&xdp5rLjEZ`}NB^#{b}PWM=IgHC_{MtMC7I`6GDL{uIaYl%knuX8u1FUj1DD z{j)CXZ$K5}c&X#n(@&7vK9xj{@b3>$xeWcUo+Lp2*?u5^u4b|&w|lpwellM_tg8L& zpj+*q^-ldatxe&_&O@ZhIu!NPY5aWkbCYxUXBj_r1ulVt^{ckUvHh2^^&={mY5!vR zpG=yNN|t!m?(2PHb`MR4SwU$(CF4+hej@J-1qe|KfG^HHG#36#bw-JdHs}>hjVPEr z4}cm|=%thVo9Bn9Xt^i0|2)?IZt%aUO1IokRt^P_J)|@I=Tf>9f7p1ICcB4_|6KpN z@!vQ<<_mP)b|sCY9e{0c&TRx@*nM2 z@j?~yCnOs?O#jZp?D*B?Lev7f}o%IJU73;u9r@L%?VzrHf~zx0A%s|-HB|JvT) z5uALebXraemp}XcM;soHKaMZT!B1|J?tImuVOOD|Y{p8~t7RSA1pb z6YZGI(xKz#-!y)ArIs3uZfBohUi%^on;rq@1$8P*#U8Epdi_@Z^yse>vl4(-o z>H|UL#obAHIF+hgnD|S;DLTGyQtYpQ%XLDq^yXlaPPygNu77F0r;h9ISOBKYE9bv9 zq6*?8iLm`othw!@N0BtNfXzSJ7Q_sR2j-}}(Zc8c2GCFzmZN{FvfDHCUr~yFSw8+{ z%5(oYl_BpxmZ#~b%*M;x+dr*OrIR;V+4XM;Kek*l_+L_r{uB=k{l({l`cGqFSb5RQ z%NO%+=E8m1``Tcd8E+x4-HXOurQl0=ro6s7XyDThTx&Y~nUnoSjohL?7(T54a^ApX z?En1xg*o|KUiZi4M}_!G(*JdbQ0)Pfoym6#|5$sl zf85}|h!9zOIO8ES|GEBkgO7k(`00)S=6qt9$zSOjEcd_ETPtbME0_|gAU4~6=%b|M zX{Vo@w2U#?>Aw$a9&nXvWNU?onA+vwn_)j%WU+(w)hn|r3Bj5r$cK%5Q zOu6>U^gq4-%hylb+E3f#L*{s{S(_6;yb1j{7Y)rhCGuXQ{k`*CCVzSP`TL|3j|%Zv zkp10UU5@#r56fr9Z(iQ1{XyNPBi;H|mZqQC-0@@YWo0D!V3#c53f89_1wEeQmQ~pyl zeeqXh?>(=S{=$JjGyeP9Ph|f*8D#%N7nS3mv_F{r!}V9nSX&6~mkj+|X(+K}Z+VC1 zGxiTJ@1(zACf4j>|1tZA+g~jx7rub6@$tVfdrM-d{nO~>@C*3|AHT@7H+Z}S!8R|- z+CLvRjp2I-`ebcX-nJqiO#{s(pYe;PwxL*V3LGzcz8n z|EldD!o1Cs_#bG$Fn+WFYDs=#YiHXpUf+*1_z-Bn@b70Dbvs2&`@`&CdGbpGP`ULN z104l+WF;H_-QZJAUu5$+dSBw{YriNEdoT9FU)x{n%h?YWx_t3h{D1Ic41WuR%qwhn zYW_H-l>L!bMfTU>Ao~k8l;fYYf0+Hn?Vl7wTS%6Ve;N8mO3{zyGxirR?lv{s-FKsaGFCKTnIhzOW<@gu3-)i?$0Mi{NyKvV6k87X)1*p6o9bxqjibC1FB67kJI|mleB)s z&tGZGoC7}g7gZj-ekhMHn((RSc2A_gYyWtTpI!S$Ntl=Y0}D|+yY^2h`=e#N?AkwF z`zOr*t!w|Ze!r~VI(RndRoDI@PSDe|Kg!KNy7rH^{ifL7UH>U%zfF+!A6Sf?r)&Ro z?VmnUq-CDn^`Dqq-SwaD`j2J(DyJi5{fD~iKZ;4+^`BDqLn>L_^&iCxzxAKrLPmD& zpRWDWwSQ8r(q_L)W;Ev4UHhkN|8(u2Z1-^v)BcyA|DyK)QTv(BP%RmK_wUsD&;5M= z*#aT+3LRfE{x6~aKTieu|9Fi5G2I#b%lRL-uK$$tpTY9v?jTX&hHb+A zBk**H$%VY`aMEu1o~e$0?(g8IsYH=nM(}+AJ}8YbNQB+&?tBQOu{q$wf;Tu z{F{Jp`T1|?^&j5^ZWibQ4w();o z9qRv6PJa+xssE4P_<{Ab@I%+{8hGN{s5Y^Focwk4AD` z{7kkU;w-D;?+@Ko#{a9eVt#*eYmVYKyu4fgCHp7*{F&eP&;HonUd5!gq=M+Q@9)t3 ziVUD|{`;N(dkJ|N4&U$ouH!C$r~nwfuxs8!!n6OXX~&4$_t#R&qu=lQVcqdpH9)xj?~cEHD9h^MW`A|ZUmy817>(nf-~6{b z{wiL2-CwSo0Xl8{MbAI|)*o!+FaQ3JW{zXPZ~pis)*i0E{l1^Hxia|O@2~odAH$XL z-_L$pUm5()v>&?jS0w=D^bZP+dUW#qRrrk`Hu|0RPk>6+m#OtiKl_ntNuJ36)$IN{ zyZ!pSeZ3m?tWf)FM$PRawD={K%l)%SowR=jGd3&!`>mhY><7Q~TVydz+Zeijy_fl~ z+1o#DQkCv!KilX(p6hou_@&k_VHSR`#83aHT$G_8{YPCWT>oC})c;e=Rr~v`pQp8? z^k3)&-*5lG2LDrD@FT1$oc}w#;IF9+zTf_>-~OGA{~z_DzaRH=Pq6u`n1%kTFM74V zP5*`7zvnl9eZDgM-}I87(aPXAz2HAs8T@kd=l;RkDY3u97b=7Aw|?NKzijgBXFo-i z(cj(w?n7RxhnxNAXFu86|9IMu2BF>c=a?Oy-)|g5R)*V8=X>ej|81Yo8V%e&0-d)0 zElE=Svts_k|BXt3+heyZn=vKhUkB69T0B|C;n=ZbcP9M(FG;*TtF1ZW@27Uw`oXlN z>rRdN0Z31{{1?xk-gj2AK0x3^bKQ)SWBwy}QD3nmjtBjeBBUzZ^#69u9!@`>KM)>n z&j$_$p5J?7)7^d4nKa0{4(UKchcGY&p9cWD4$t5mm;8rcFnxwm5nYtDw@BlQ-x0U} z0<68c&d2}t2UvVv?El_T-!GP=C;hK^6dABi*~2Nbo#Yot=A#V>Khxr6LyKORO=lBa ziXz+G=6L0*Usm5>O6U^9Tj&J=NDIP^hrs0Q^^-L<@^K==1(|QyA3X?c$4*LEM(LV=Z32-hb&`02W^K2y`B$3?T9YmJ|3p-|%-!EZYvpFqy!8XS{in-b zyLf6m-yxk>yY``w^`Ba!_oY(vSB*y+s1Yvdt2h+(M3?|mCke1KL4bf68D^PPw2$?l zFPgUdZ(y2T-@xG3{$%}ts{YESm&NOup1-Y9|4g*?OIK1L&M5i!yp4aEN(_0F{G;(Z z$pVjkn$JIs{4$d@4%6-JlY#3J1Sw_{jsI49GsLKLKl?LNjrKo6|CdOmHLg_pS9YxA z*HrUm$tM*by~1p-IhQSq^-1REgg^9E(SG}{=-BalP8IaWjntLjc>hl_Lj9tP5_?9+ zVFTI=^D)DwCE57`47RM_Ims_pCd3Eg{?M|1qm#`*Z!L{KO{12p|3nleg*Te>oj&(E(o(GN!}ng3rL_uo$P|CLRU@CO!l`)?wS;rn-P{+Hj#;rGCD!=6ZRYWq*G z-%Y&rj~shPlSKYoTk%ae|1Zz_Prw{=y#)r$?*qdOc3)V(5C1haDu8A_zP0P0B{}Us z+gvYVHRna&M*EkHewq){d{?mb0C6qb|Gh?ym+5?XM>1cIkaaX4Zyq~6dDCV-p2u+e z`?wztzC1?rN1=PU6}TGydO(Oi8VOZCyZohfDE&|I=9aMak8d6vi|>bL*AHGgW}iCe ztsf9&ZqIKX+-~uI$ilC7{i3KHBpGR^Cm1-Ts?J_35o3z1)F&J2~`?;W~@?Ozk7a{Z0u_ar#> zIlB@OAYf)sJN`-D#XK~AcK#Vz93z(X(|T*s2EDTHpOZgObdg&BXuPEmgbixFgW`=V zRh;tUeWFykd|LY-EVlPId7jY2ekEx8&zflcue7~L_AHT^_n_{J zNfB%NiPiS@6SKYcpWa6HC!>E}tWPB$$^In&nlx_OpJJP7e@gvm*q<_<><5wkS^qav zzoI^MZ|rQ&#QuDBjK=@tV}Fu;pCzkI_+W?Ip9hb#{VC^9#{O9pwtv!+W{ zuAd6#k5t+o&LHj_bg*AxLr#0Q`cJ!neH+=YrvEt1i}kT&0oki{)Yoa;v|pneP5V{q zSHpglal?Kcea+O*sE=Kk-kK+|U*G6x`<3Pgt;D&#`Cf)uN3SMkGJP5nsnN_=hH zX4*>vIQpA6NUeexqr>j@}q%zaHrwtM&im zWxu}YW52>JIr=n}%z}`%U$;+q`*jzyU+cFq`}Oup?N?ZEr>P%FUWuE{J5BpFNPeL{ zD^z}eb?A<+{VE&B)qah(Rbs!wr$YK42~!8#uX~Y(aQpRljP=)f%pMwh|5bi}GBu9- z(_#Hq+>s+Mvv;51_AjG&f12puSJzru{p5n`!?_{cPC3GH%$v&10s1M}6+z z*x8(k{d=^d?O&Q-WXUQM{#l3Hzdt)D{pVWz-^R}Wb##wu|2Fo7*}pIJj@ACb@v?tk z_OgHfFh`#{$*#74cTafx_v_659lev;zdI|nf5nzvrhX)OCBDKg)BX*TUl^SgD!;!z zlsA4yUH@0vXx#s`eq%NE?`Jd+9BvPf&-pEQz~0_4 zV`qr{8sGog;rCCP^p{@0h3@)o(D^OcWsHZ^ZJeJ!KDae!{lvKn%lU<@{W!Mgw|;z_ z?N``k+OMO#!tB>(@9SeUe>z_F>q|cNtN07MK6Mj1ZNKiE==u{LVD>AF@%rtb-C^3V zB(KESmi5~Z`30C2D!<+JTXp{_t>4y{sr4E{W_aWWS346w?1ln0B5$(fgmo zRQX#pbLb7be&ya#l>d%B@0sS92eth(&ZeG-{UHzpv z63~9Azs>#gy#C7hKi>N5#gP81=fCmyr#o}~aGdNOWC9e!yk()qIk+e9O^)uf|(^ zgM#J0A385m_@9Es!{Pr+mjANWLi$_M=d3dL*^-`D-%FL@@Av)B$18*H_x(wz4E~S2 z+Rsma+vJyjKiExv{Kjt^eAoL^k>M!cIrA~fl=HN~|A`m>M=ERoQs1waP*wYEtqi`` z{jtJt{IK!=dB*>orYm@@3?a&3m*4-Zd{z=&%1aI1avd1X?MRh-WdcWL4bAN3Q z-e;!kd(f;euf3^$p8hu<;N{btZA0~?Q@(ys-v80c|JI@re?=VT-#_E|3gY$c623gY zId1U(P{Nn~_u~ft5edIOj_{wF{PlxdDu(aY|LMNl#*p&&Es&D@q<9`V_ye~8>;zws z{t+t|hkSo!XyaIX_CQ3C*ObdOnI<2EziROgK{xLNrR|2;-h{>XkI_;kOXN-g=nbHD#6 zUS<3!NjYYRj@>`&&c9U?hOeJ==ik_ss81y4Z!jVsR(&M8^Y5zU&u{)sCTF<(ms`Ii zc^L2Y%dY;fO8&a*uRi+kc2tnz{jacjsNef{-B)&DaG9!|dh_PkgB!$RtT`{z#n zdq5?dHMO26=hn%uT>qz9k|)An$=1J>z4ZcS<@GF}Gl+fx9XEenFr?N$-iyu5 z<-54`?@g2S{`+^C{H0mkLN~GXCsST$<9g3>tc>9c14|V>Zm?|l^ZsYTCwrUfpF5!7 zRk#0kl<}_^KH;;-&JRK}=V_z=4KMh$%HY551)o$dlZPC->9vjiSH0j5R7U^HUht<^ z2LB~5`0>(x{0ry*FTCJaH~;mU|4&uqS3tkt{G+cj_+I`OVdEzWk4VlR-xc3C zWJ&qe5m^4-|5`YHk-iq9aaDrf@>*g)`;8w4g=qh+zNdBn)5w=bdM`-Bj}h07u>O~3 zaSHW*9=Eofa<^t$6jnZ3lavRlxPGCzANKvoh{J;`_9gcbQGOPe=?`VoVc+>iT+BS{ zp-EC+_5Pco$H*L!l3{KkF0j`(@7b6aQP!?>O-zS^8RPQU=f@B zXZrtLcB|3fVeV$bKM2k_BS{CUIKuDO5`StN)cOR)<#}4eKVkM7v%VRo|Fu|v_YYJ3 zw<>?6#9bhM*Sy}S4_;Ys&VN?p{W4U3&UI$`KVM_U{|>*$NcrEw`xz7T{@c8c@%qd4)SajwGOZ~U{hUpfElkDsUL---x_+pp#9_t$<$!QWmPd_VekRtDdX z{@s3v@C(^_Q+ z$1k_@SEPTUbJciJ-TqNI`6K%M)-P=IOZ(q=ZRa;;foE5$@ohaaBwYT>?O%T4-3opk z5ekR@rW5@&G?=2>6nsDX$tM5h)(;n;zwx@t=(n%mX#eD<8gJGxc+?-rKN^$UJZ$ur zJ3lh*lM0`R;nM~%CfnebTfg}p+Ec44gI~OUvTKf_cQgmy*Ok9IrnY<7=y#Jp@|SJQ z-Iv&-U&+Cz7y}#pV){wnDP=8d|Cr*3-~7P_pPyf`>p#E^gy>?`{~tci^gqJj9X)pJ z&iGYyN$C8=jsN=@{635~g5}Q+{|%z=B1Io<7m>bylhTh9{dC%46zP48@dKl69{%!2 z70+9z=6?q&gKy_Q!K41i;}2E_znuOhd=`1ZPpOb0wEx`hZ)vo^Q|qzyD;^a=>p#); zj_bdKyQ!Z(@NuONDYf`%lRr29Ki)#WdU5Mt z+72IP4IAr0m@DsLYkxQRRO3v9_mmwU%9iuA!8he^=5j^PJFtf4S!%qc3TLAJ)8nz3 z4zJ*TKjw#h%Z&a3UtXc&WKYz#80%rOnAyMb;+qiwjYouE@zX~CQK$JwTCc)yQ)Te| z_U|`V2H$W0ez-FDe*5?9D}(<_ul66T48GU;16)zX{*!=?^M9H)tn|mm%GN*J*57j0 z8(~^x=r@##yoXKy_{o2D>#u(Dzoatx|GroM>+^zdv`eY=$F!u<|BEVv?`HpU_(PS! z_q)H2B)Q!9n^GY|$bY;4Zkp3>58PK7{R_S1?++`3{~0g%_g4nr{Qhm8y~W$(3zfnD zgcto^s|>!E|FvjV2ETa!Jg+^TuMEDE|2{$!Vqlq?5BT*z(i*b5Z%~!?{RdkA7B%yJ zt$uc2?2+F#=O=mmB50KU0=_7xzijyX`9J=kGWg}@k3>IF5>7wgKTJ988lt(N%5T;8 za8-`I`_+|;d;QmchGPGr3Fg0OrC-!@=_0PWyy z9-xpaVTbog*8VlD9#-z?D=q*1SXjjJe|Ja7KVeMmb3Nlnn(h*sikbGt^ml^)AZV&^ z{2QHwjmXAF;*ZX+*3)$P7WrQ}KKK7mQ!`tRML?eBg663+kl{7c~}{~PVE-%>!gzwr6~mEili zRBeeT;?L)=C2El7^oPE^ne9K7Dj#Pap48HR>hd*XUj*`9{DY}c@}z(K{y6LSLz(3> zN!RkzI4e|sj)dOdHLMzfP;|my?8oc{_Ag@~)sN3y`D6L=qbNVc-!DXdXKGRO(e$YR z|4bQc5aIten}1vL=Di&F%WvNhF26?YZteFqER)b*sr(zrun5hX$Dse(+MmyV-Q<^z$40~EM{Mxz>nCX)c6<(ChH(AC z$3Htfj?ZJ2!OyiHQjqzffnDDo9vA&(&_AyF%TIo21{E$py#3td=UX}Ue121BQnY5M zb;|sYkOw+%~`opfj-ir`<`_lR=&AIsbC_>cXiQ`u({Q=LQKS>^b#c+4g%9zErOVfLe){2WlOI3-HNW<=!RP)X)DM)9A{Xly&@|^IFk1 zb>Bi7m)~2Fu(kbsqe{;(*gaTZdeUT9X{n0Z2j9PJMc||93`1)~c z(VREf|94pVmzGKB(d$+6`}4_uEumAtQ+@9|&0iuE&jSaJhOR%$No4lt#(zWpTKJ#T z7g4Q%x`*P)QwdNHwCsNwxmd*3f86{(iNvgf0@(%(WOzZh*bHO-A7%m_@) zo9dtZ8d~*Fe%_XpZvU@l{I9hCTE{=2wu^Qp@~Zn^bG zb8epeQ>Hf0sd@dc+kXtsHH?^ykVQ8E#?P_uKN;fHe1cv?A^JZVzvTBDsj{+9YyYwK z|4hvafAU{#tP1G=$@ujzIpa?TWKpm?epO<>(s}Apyx&I0_n7AkKZE;UwEjZ=oc{?Y zL3AG}rOWTJCikIIefoQQ%T=S&_xOsMI5(~T>`vQ1m87WtbqW$yZvE8Ygi@d*<<8F# z{Qu6{%c$8o0v)@4{XEiGj{mm#i{Jj;Snr>wO=#EWC;I-eG5<}O)h1+Xf3Nwg81MaC zvKhkV-)a3j!`F$S|5hPJ<>=3lK?dxIpZ{!B8T@kl=cNBfD}(Q}evZ)Mhid-^(1DBW zg-nL_!Fb&u`3-j5_}{--otLX41>y2n?7wsb&@MdvRNx?F;+bN>OI)GmPbN1^`& zymGm+A8Ear`D zLS^vF-9JkDt6mv=yZ<9rFCX^)kk_Bna<-67{>R0CMxPEU*MAL~Q2*HXztfxv|6rqi zyn(?pL(Z?p-=CRi=kHR=9YbvYxVRKb(7Izf+oxU559{10&v321Ju>+Uf_nUysCUNs zSt14o&Q$&{u z@3;PDgYUQg(LP>Hp!%;QQ&{^_9VQn!mxn<@C4t1LpXSXtjB~c$&(vv(xM^iMG;@_2l+^ z8D;d(#`}%6zwAZKfA*rpe^&eRHr6HnyxJdv?&AT)AbCHJ+W#6oc6#y#{-?PfBGZlU zdl7zETt3wNh2Qr>_`#L?SoyT1W2lt<-%=+$&-R~-#~-4P?A69VIsZd70?GS}_EQZv zvH3>}hJ`QAZt-6(%x+7x?7v2YoXmgt7*2Iq**yPc#XqV5u2k|yg`*A1UrY`DLPQrO z{;-r>_PJ#L$uETMzsoCK2p1E0)Kiz}#qVG$y!=N;>5F^u8~u~_=ZI-Ut9t7sKeCwm z5kDwi^4UMyn@7KFL-fCeS5Irb{|5nq5VK7DT`@?c;_%U)!>qg${bC-9PuTjoytK9$9f4!{bv}#!uIoe{Xix}_ewc#EGr)gMmOuNT^xtaXf5`Y@AmL;6p1`cUt8$GQJ-|4onnLq&X^(M^{2H!B*;@$1)H zL;Ih^-$b`;Y@ElhEf)Syy#BWtxr*!6KunJ2g!I40^yR%zjdTCw`zM;Rju^k5TNm1Y zwob7AW1oLetH_zOalZMF&*I-dHPd;1jKjM^)=xAtAPu;Q=KtYDzo=mSL~$V=KQuo| z1#;G3-0)}f(NcghbI$zF4L-By1K@M}*$qCkhXUYp`@;=B8xI2DEBy;^sRgCxol2ik zys^@ZH&?4T?cYS(RQv_DzeVY=3(p^6{dGLQMD>rd^G&4)>EG5V>!%sAlJP)^zSj_4 zRGR)+(Pi-8S_B_vU9c~{ZwO?WlH zN8>H}QJEC11KNwAA~=p^Y+ZK*D1Svn*90XMKel$e@p6*xc?ElGFaIE zTHq1<7iCKPt8XgT|5YjZpZLF@j~^y|1W%V^^mBfu`5Z$}j4=6gESF|0yM8hyQf1L% z+Yjda!A?JYWCh1-(BOxC{Fij;IAFowNKYfZ_om?|mAxti_kT^ZxQ0|mq}+D;DV57Z z|2qHguhrs8qoE-EFFti3s670BQXWo40;6roel?t;|Ajqv zewO54eEz7j2D&b976R;^D#HVGy3-%RKo52-VkQz@cPjoo@y(8+Tj0)*-uXG zFD|a6{Xy#Q&X|9vd{z88j4yb5r1@+i8~@+%;{Q{X!S}oW;K|D1`<-8YyfXN1{V#1z z)&7mj;J@zG{#!%(Uq4Y3uYXOb?{~sHe%_Cb@BK7h!JXG7R7Bi&D^4ZoCe3gj`9Ch=qCSBJ|lc((hu`*SM~*+j~Lx!p5GYV zW1ioDJ?8n1=AG($%7E3&`@WO)rW$_lws}m=7ijh;)7kk8YyZ*bMW}wluleC}=Qq>{ z*gQ$x*CUR8HNk?Gf)HaXd`ycUDxL6$51anLnT$@sKRCF14Au{_{tx{<}&Y zFI(vcDqLHu{1s?EIr`zC^AolEdVe%#=O^OXQH(cC?JGEc05hm9iAJ|)!qxt}#N)R0 zf7p_vZ|3h@lHGrYft$`T)}!5EQ?8wo!ii?T5^n9DQ)9ME{=H=5pDsM#)K6l|P3C&T z{GCc)=>lwhkNPW1hpTehC(Y*)f)i}~HF%;_z{u|^`!P~!bY71xIH&z()e|MV*nZE@ z1?RB)|E>Bb12{>Ck5$or`!AYk_NS!NiGJ~^D(FAq?8l54>GS_6zdA|rUylhFDElPO zUY#Ft{i7?g1eEoWBBx3g2XHEO;_1z}C z1J|4JJo#-PLbzT5gdh2%P`t#`)ITK(s;o%==w)8>Ujzq#_DN2^DpxlC5dI5$82oIWqA-?+58c!1|DIE>_kuLx2=y?&{*q>C6Tc{V#nuC7uj2aOET1zT!W}vM%;lLi0Xla6alH4hQue9#$=lCL^Vvc+ z`^|6vs)5z}kKY3Im)(9%vA4@(xcxxz-Ns*zPtJIpW@{fp`U~&Rm9^OE&lykKLrrs( z8voK_3ZINqo3d3Y^OKn zs7kMS(QkwAcYpOxFZd}HGKAv?KR;1OKRkoyhy2D*8~uF#SqNXEturAagN=SZe|F=4!;5|!{BrA`)c$+D;A0{6K>nAqpJ5-Kck_c^ z9sPdtXM^u2|F@49|8c!_y%&5N{p$YDVS9aW^Tles$s2EHqdkDd<-@K2V19V!M!tTK z*3=rZ;a}?hRCx4O1#cr(PWD7AEZZ9!{8IPF!YdmU{7sd?FLi$|JTs`^Z>|jfQ)TQI zF=s&S7y0!c8~@Ac&%vGhPl?y-{OkuCd?))w-tT%WxiPjewD_bL{?^L)UvB-Se~p^I zycaukxcvLwpI)mBz7zkE+wnu`r~b;|`;8wjSJwW1-;aB#GWfqJ)&GhAe)A_=|8rZv zM{;%h4|vg^)*?e_KV4ml{=ucH|M|hU(eKy(v}IDR|A*60?f>WrzW%?+OaB=BSCMFE z3I9o({FEBMVF2w1!esi=GWHvl%09O<{EO|!nF9*mc+2l1Bq6;2e6*DQp#JlEW$-Wc zg8yb^@IUJXKRJahaR}%CLNEAqo;V!7-~LVFJz5FgPJV=ag;%P9=y`d9nRN`uAHWxcxlrrGIVm=d^wR-_GIppqKtx zgH2xVSpWOle?WO<^v}ZeU^<`HSZSW0YpgWS&xsG1=jZCn%=2?{f7duaSN|(z|6}uu z*8N{=|Iz2?sD8q){t2J+bM?vb_k*f9KPS!zJ3sfygWmUNlESgvUo&>+=jzMM^K*@5 zVdv*I^?rZs&(FQ5XROZ84KEgtSkHg`j~sn7Ysfr5w{K<0`MIG9=zmIL+3uOlfF%D8 zSoue#XW{o^DgN3A%=L;{L;U=TRew-%`BdrO&1s)ZMu#AXiSGYP^rplp`u)b=v=$jc z`&;>6=<(!Qyl-`#%3HfmwHKu~x0vmB%L=dabK)PWXutZFjvc=rw9jvh25K9L&W-iQ z-KG8Cjf!CojpfE`=Lj!v^k2CAtYUkC>=kElIwPL{smt>}`du7I;KNsQ_&ax#p})o6 zD<2Iw|GX)dzX^7Kn8A$p5oogclhJ;}&rCfkeLB8L)&poUB9qG+%${G8_M7_g^#^N7 z+e_46+<(cIwcs7_@4wjT&(XJV^Vz08EP%w4NPe$l^2@Rr(fs*S&U!RFW8w#MHy!T%a{-y4C${P)c?x)Z!miVW~`9%9|YfT|5)=QhVRtgEtyb%NdMm;{Qdkd2>;9D_6&mW zw0=Uw!;CWwe!-)`@MZt?!T%AGVD3LfsJcG?i*5XZpXS(?GnbS8Q%e%{|K_XzV*HZ( z!zS4HU#LGM&X{i3r)`*S8-EVF=r8!Wy**$?EoA+8^8}nfyYc@Qc7C+sKTn2^oxl3+ zAA8H6$m92I3m;9^H#ukB;<5ZqH2dF8{*bs8r)hwf>KB zn7e5xdtaE4?Vs&0ar|76&y z;QQ%sTmR+$>lsi^!6Qun7t0Ubm!7kp(gvapf4}*=pZ}ZQ_*d@y3gPF+KO1~M`SJ7rv%&XU|MK&{w88f~|N15h5c9_) zvC4Vc;2&l4_ne{yuT@8&3~uuK8#((kREygaI?etN)9}8gMMz+|_A|JG`iJ|k8+qDC z;2P`rk>}q_{{L7p%os7&6WhZ}b0Gds{=aZ)j=aoc{!eN3MWM&uI(Ax|J`&F#Ci4EN z#L?QHfcE1>r1tAmYOVQ2zny>Z>`q=jttmf5{&VsD&lBqV33>d=kfQjb`N!^e&f|Bo zSr*rRrutrWUjN7dvIK4V*SLQYF30;;Xmz}~Ox=H~{pD^p{rRLn*E`Jp4}1Hk$f$Ix zA1)5_e|zBI9ZY|ukQ_tD!~ZRVN%l^)&dW#o6^Un2+c4$)=L^ zCF%72irN$Acsp~sSznI+$_TUS|4Y5*KQ{XP=3h%HgU{E$GNd{NV_v8HR{SM>8=aT< zCj(UlYQI_DC!jh|?RsOqRAzJ1ziRVQ=8m@U@&w8L>`PupCyBizaXU$rv49hNF<3?a zub;|7YE9raEH?`}Pz9Ibr7b^WQdXYM` z*8FcuKT+>*tWx%2N%&g-)Yg{MKPem?s*&zgZ`|l#gvHD8pLVcnQ^aB7z^ADxHA49T z+xYd9Wc_0~8;<}h;rbpf+!24zU5^)|+xYnjC;F*+5q(VABM7d|!-hX!zj1<3+wY=& z2cJK*K_Q#Gzxf{@oSXOkq-bGX4sUUG$Una^351wNEg9#<@l%hl(te8h4M~#xhx|&@ zu{|ig)N{E0&Ijk^;(xm6Rrtqs#x&S|AVlH$2ck#M9~C{aYW0H)lm2e2y3M=fM$X?5 z{a<&1pQ$U657<@7_!s*>b=3Z=+inb)Di>$?D&J6VJ1Jke_Li#zxpMny*PKh=^sS-+@9;>`Qu&Y`lVaRG#5XA zl%}|a%9Te8Y~7By^5jNA!R$|K`*V8>2)7{pHz4Yp!sLI+>nHytPkt_~qx~jj`e_<% z2yc8!`wdQVkpD5GzdkQd{u!qB=y9Vz1)s--)n z+n-!1D}8T_azOsAK2G_GFeT0>nqg#ag-qt|0VU4 zbNSy>$Mpvo`21wBf4I(E zpUb3|2qyk(6UzS;o*7W%3Gt+^zQ;&J$@8d*zmwEI%AZS|p}yxq8OXlkarm{W5%+Ti ztv>_qr;`q)bXx7LqyDD)8VWzH{{Zhxr1U)aEM1QmbRS}xUL5L6lI8t}dx=Allm6VA zBQK4mA>U7q&);_B{vQyRqq%wO2b9BnwvJzZ{#V@Jz=Yx)t1a(+DsTNZT|bx#uYS(H zi6f-`30uD_$v;M!8~(4JBtZVzeiDB=%7)2S{RvdIv6PKpVfZiEbaFob>ok77`nkzD z_=k^QVfYvDpE#ELR4Q|1Hk=JSvHVRYO-3clJRdl;p6x%EDV^lC_@CEXD{0Ux zm>N+qdHU_Y`}w~bfOGh7mH$jZm40*1c#z5H5CkdV9jX6C1^h?;aQ_Dfwy7+o+K%VsSA^5a^Pew=W9Md1&fKVJk_$!>?r&T5R!PF|`w=pN)f9|OB zU(@lvB){{&DHmm^NdDkI<8+1Ad#L2J!Q{Rspg74R#Y;UqkpE=g|9KViC#|ne)t}@2 z{lZmfG?ddn_om|c4GQ|5KYtmEhr{RhpW5{=VN3G&k{A5679DE2|5c}00yh5neSh#U z)*jA3zxDs|{(ktAmC?`7Ke@@zfHHXWu zIzN~q>c&4>Ko5uSw|}>>GWtt>zga?6{28tc{+mAbr>Oe--(}l>QDyY2^DkxL*D8bW zw|`E{S>f{Ubbo2e9+W>n`y*40w!i0j$=``;f716)pRP>)xcz0ckwb)!KW_Tlum4)m z3AW$-`u~Y4e|r4!ntxc@PjLCM<)L>r`-SUI%Io+nB|nxr;ZNE6>sZRajsC9un(OCo z{2%VhuM*}7C%=|Hmh-9>zmJB1Y@wWNg|Ff213~4*-AQ>km7pwC_n(}ibD<|FJwx-()sk-roLceJY*2naZv| zOZc(o%izDh6#Xe482XFP2lXFV7*@V9iI*?rAIw;|FMHpRn8xBQ^tE{bca?%K;TiHO z{e4V)+JI|KhA%tWAJoV#`oqMhwf?*}Fd6&*nZEr$EPLXY@+_?EJ(2qtO+D_n$)flWlCX4?;a21q;og1_cHI7P|A6}5&htoKx%D%HCyMPO3z`0N;=itF z`cnS=AGh+4o#6YgLtgW@O#jpSpMLvC)$RZK?Z1)%S5AIy^dIq(Up*gLZrC$guJZzj zH=!SABhw_WNX~nlf7H)r_~&GAd7UV+QPJl6%`{%8#gqOC6mP67N553NvEIng&+P}Q z+5VK}<6oxy=2HB_@)`Sumv_=XaJs@nkiR%a+dd?GHviaE3V#j&{*v3@JV%@OyTi&i zdU^Rm{=u^=O?&_QEZzp&*5rFi!57~*+s65EX)*l?pll=|n#qfbj@&3U1P3@mQeiiahwwIc(PzP@K%MbEzbt(Q~ z|0VE}Y|s8n{M*a*cOm@}o|(@0OU>Z^d}1?`U-zPz(_;VETPtbME0_|gpx^vOjkiFx zi6L%3>J-<21^=9)+4;+Un!g0@zerF@5OZL*e_it{Pv0N*lRs_$43!)IE#&y(ugKn6 z=KIsyaLCvXzV;W{PmeSGY4%UvUoj5EhuYe5{FC}8raOawIsGHc$G=SZYf9N4SU#iw zczHMaXDU2|hyBRxCvLxC#g32YYutWtEJ)b`?I$Q_KZ*CRHSLx6u{fdS{48ldy^qOj z_4bn^<}`i+@BdEnr-5*KbVA#JX+KeisHyeqH%BPzb8EU)GGb{ldThMs&JA zP5Z-?PcE+%rva$k`i+5;0z2Y$en?kzi#Z?E?@I)H?H6T0?))&Df4K2a+h6O-*$)=F zeDPQOzyD%}zXd|(6}CGyf1Fav{z$7L`|GJ7`>TFQIsQrg$?Pv~|D+h&Lb81P%g|ru z{(2#>d`5rs@^11osPGWcMWyLy_SaU=_EQKn0`0G8QJDP(eY|`z|2}hxVSmA1Gmcz! ze?<3|f-m8j?X@!0{)0n)^M4xuf%pGn{*nGi4zB;*{XujI;{RU1@dN8=;fJo@HSolz{p2UD|M=QpYm)v08vOckvcDcz z!@m&Re^q1V9PqKfsPf?TLwSXr37=|i_r&$5uKnXVes=92C1GCn4=hCS?Akx2><_`l zOF4cR_E5V{-;&<7e@uI>g+aM5y7o`Pny&o=a{o2+v)ixQ|F3KR7~fy*+CSd*TX+2@ z>2>AiZxdww2Nt8o=-NMB`^OwVGDAsGkk%i%>py|#C(7v$-6X{SuKm-sf3#4<0lWQ| zW--&d_D|RTK}Wn)L<&OEetA3Wf5C4dja~bvYyWiZpKOOW$A@;~Nq7INyZ<$boC&gG zWD95gr@Q-K=>OyAzo`9x)Owzc+ph0?_wUsD&wYIVxfm=uzThLt8@T`VWRU-l$N10e zpK|`kt?NJK{AaK{xjRS(^72ml8zEgoc|AO zy2#w``Z$ZXWgyD{CH=2IZu)D%5&!c~3KRIIpZ^A}KLGFlPV%R@!06HM+Xra>>p;@~ zsY67nJ%Gyd^92lEOMAAncUZdQf2jXYIsHL&rT#yD;|JBl_(8ATpHFP{lb;mA@%8^v z=aoQ%UpG$vKL^PF=lN3pvl{PAKn=w7!ma;l`=NV&E69HE7(e5N%Gf{U#?RLApO<%P ze^`Oy!u2CbQQot4{P)yf2q5(QRp|I%*8i9L+qRzH$>W^;ecAE<{p|eJ{~Gu4Kgj=A zo@Wd7e=)~@e*TKfH;2dDla24P(Te-i|LXAZpYy{mr3G(0?fexV|Hte6l^*}AIe+!H zIrky`YG_AayuMF10)D>FJIhW zsXxDjRfnHHrA7l}Z`$`hEf2bz;gt%pFknPF{QHu5At6o4(cew~f#D3+VpNfc~p!DRuw6jefuH z$3)1HqHdtC-due|^_~aY@Vb{_*bkOP!XFpY-1K$;-R-{|fkz2!?(?jnALm@ULL~M&Tbk z{`!4?c{6e^T>o^xU+Xje4Ohm0zx9XpmBD}2YyNwpuRnC>-%0?>=^qqY^}zXWXRN=c zn5*%ng7L4+{2vH@b5Pk^e(O(EOY%hc%bfcEU)tCE;T7DUG-_@ap~*wC|2U~5_a7^0 zf5oL}X9@p1+x*pU{g9ey+GcP29jn;R@GrE#Uj!@0kFEc8 z_fPta_H@%9e)gY@|Hsq*H3-G=uQUDk29cC-`^U?FPJGQipEelWJ^~%~(yp6cJ@M;LDfx_;$=5PX9-k{XlrQJ^$cf z;Q7lZw%pxEok@eN>yQpKbO-}e@Oc2t@r&ncAD8@xzhe5V7mL-_s`gIfi{BCVzXh1y zSmWcr{Nib<_?nLUemBWuJ<&driZ#lfMRv%CU0!)Q$$agSgr8}0+0de&%%;BRQWT}; zzBF&d?tgH@|8JSRr1d$3Xz@x6Ot|*%-<+IB*5gb45<|XWU^vmgKvdQ)*_Vt@)CTn{ zOno>oVxDiFwbh8j?Da~YQ++;9k|kBv-hPkT+Y1J_8~Bt1A6+-$+RwHBA)zqPLcg7V zl>gtinB!snMx`IiF)q#h+@alN*574813y@K-Hx8grN$4K8l~W6h>sse{R9vGI!Au4 z+!nC^(A?6o^7BJGKMc<1MzsDL>{$AVwtm0y5B?#9e*|1W$IhQN;($@kezmP1yv*!Z zYyZ_8L;V5IhVWm@?}Qt_VU`-dHNCSon&VslFvj7|>yk|Cx&5Y#ItN_F-?Xx%ec*wf|Ca`LxQj_+C!?WC}V2L5kT#noY>f>@`@zbOos;O3v}Py#;b0Z**WA#l<1ZYkg8mb3{*y|X z3}xjPD5bVm*&{S2)Gr}>p`lEIji$XnuVwwsNqz|&%#Qm*%lZo?vuE!G>5IdJq2v7t zG+!Au@P~i@s3-iV2)@_;Km2_a^1E|w+# zne%&HO;KRtu3t#3t@LYZ$bV`pzA3kUq^pSos6S2r(>ZLtWPp92c^12`t>1_L1{#D2 zy|mH~Ke6{W7+~u?R7>&mZM6Sr(l7UOls!%JT|R%8^FL$%w)yE~yv)ppcO>&=KnBu$ zeDv7q$(wfb@nXKS)VLoGzB)$pN1=N;gQ}uR`X_-9^l47aW|hB86)K%Z&*qk}{hx0h z9E!}4gUJ4bU#b4a^{IPnXL2U?=j&rM{vRLvlkEEpRi%R8>2Uk= zJIC4nG_CvD`m9KK7@5|11?kt3DO)u&?K&#fn1O{@gm@#y=YG3H`I@n)YXH z7qdTiS89L4mRl122n3(xmGtH49@G8|l3%FJ3YFg`&FE7*S8k`XBW6{Bm0&4 zf6tHgv19?+tAP4CjhpsseV1v!O8sisuQG1fucNP-_ABXQ7p}MDN$l4*I@*4v`9TY9 zjxV0;aQpSSgEt-5_0#%|=K3kz7`A?Tq<5^=?~m8|>5D$=r?9}TPu-+e+ppUvy#2b1 z*{}6I%zoWlsr@Rxb*pK=lDv}r+PKZMUxVZq>a#-S_h*N0@7k|Y>Rs*E+I4FDqx`Px zr)kGZ>^XR6NdKeCI@o^QjWmSYug3jfE$0Czi2Vw`X>V^B*y-W_A>Kyzt4Tl1kM*&X z4YF59QD3KV(|(0Lru{1Qt6{&&xM9CGer?*Xq>rgfm3k)j>zf^Izta4m6y2H3Kkjh* z^~VQqIj;6A>@w}w(OqHoYqNK(*8h)}{rZxR{VL9}>(h*676i5Zx^u$YuMaT$6>ekp z>+O}=uW;U-ru|CtO7?5>PSbu3l3##Xq4N8yLw9uTS1IabkG5NXt>0LQ{VEoP^grsF z9c;huMH<5G*B#mO&vO6SvpuUalXGZ^mcs!6yAbYui<3$=b?dQ?kO#4~tcf)>`al?LY9y9G{8V}s~&T=L8 z^U;pBpJ{%=D07bf?Ei1?Y+~ywt~h>)(~$6?rU^98hs%qSNTEu=rbTrkD32<9sRX%z zs2f529Dk6ABzCYe#Mj_-GA=- z{R74Q=U(=EuZ^`B|B)I%9|QbX^<+tVZ(;tvn{t^%{%iC7udlz}^!MWb`1kjuDe?GW z^AGB8nOlCW{}$D6@3O_EwlfV~^6$UW*MIZ(zqVy7;?(o|hj#yQd4K=aTED)E{wuAg z(U_66ewM$#Bz65PZ?78V#cjK*<-f+Q|BjZ7r*0WaB-^vF{;S;If2jw4;`*<|EBUW9 z-zR30_{I8f6Hw3i*XRUykuG)r{nz3FyHsX@1pg9#a{h9 zi|U8H4@B*F#aUlh+vXfO+sgJY(D+K}ae4DrJzu3IJ6ZYPD3Z1NS47`0SA9El{^RQ3 zXP@t%3A+DMIaF`Q^NXiUTrKYi`@iPL{5 z{UYoB_K3TG`s@e3f7ND4=O=FdHE-(wxVa$X&u9NqRQ-=vW;Xqjv4Tf32)PFVYRLqx|0bQxBLDHwN7I zX*5RN{U~AeS7=>m?!PL;{LE{=O`{dAXg{DD$oB)%&D3h;&wlJ**#0ewkncZkeV>Bf z&Zq|5^7S5S`7ee22c2*=Y~IgK>8SzjU(ok7YJYIotH0i_{UMWG_1+O3Zkhg`vp-C| zZQg%I>8anj=>^sw)sDLKFpp0`0msJU>n>c#-yhOh|DBQR{1VkL|Na@*>moPa%I)XB z|6DQqzs>FE`}bGO{>9w>6Dw$cUv59&e^qJu!}>pcKSn*I_xpCCq&%oOkK6GH+rJxR z-|kP&9^RAk{g%NuZ(d^d%d=Y&Quh4&YkRY7k>Z%|x70{(y?uIpR#Ta}nZ_y|7F7P? z`<(mr{lKOxIaxS)>(~d6v+-Y&Kug#E{Ba}QAxwIUnOn^4_a@++YTbX#`$d;_nDLO_ zH%iYJzp2$1dv!S7Kk0d7X7p))9mxwXx#=yw?8-~}_ev%&&4ai8)z6v#LiU%A>gT0x z351_6J+W%nFWzPK5nkwaO||NtpAjE<&Rc)7`G<+re!%>f%|E>TudT*(IOe~)N@V=K zQPiVdzgnI1UvhcU%P(5L%j-|`w|w*OF#a+A#_(U@Z_hCPV*Ir*G*80%-}S!!c%6>F zYD@3ATbjR@xMY}--G59Zeb7rx$NPf%A5ET?YI;2@pIf&6((Ui}+W)8|{aHO9QzkfT z&DRJ9x>$B>|Nh$omFOa~UlzIJx~ado{eO>`AHDGJn|4^~cA@c&YjFNX8Lm?W=ii=a z{zuo%QHS23@=&YDRKi65`<<{@g%;wkZ2f2IXGLQ~o!PbWsM!4bXYxc``(gKQYh~Z% z&Ae;au#S_q`O`ZT$e*8u*>A0R7N*|+nzKe6%CZ-2eXZr!z~Zrf|t!>Bl2 zk3Rc+{Px=~lGWife`5J5)Vyat`*HI(+Cr3WKW_iUZKizb_CE@0|Drl|{r6U8|Klk8 zJ95v;XfX`;S*<|BER5LzUV8GRpqb zmD&GqlznCF*YSf zyZ<^kHrXF`)_>HO!@4|nPPJ3-f1)|+!ZYrE?c1K#`>|+0$l?)aKZyGBSSvmIedVPc zZup;%YJ19Ly!L{tUz@tU`98TK{!usnHvK+z8=j4|!mV6;Udwlw^z--kx86@jyAv8S zzhe7G{QMX7>V&H!Kh*gz!h_Fr6*mT0{}slcK9ClF?*6p+ivRvhnEyBV`*(!#fB9Hi z{@*^F7Jq^N{44`_+=$kAHvN&p$Ex|JVOiV*h(4sr|C#_WSG_ z>wm<5==dkAe@eY|Jpb46_X6)9MZ6Z@%F53r-9KvO-+!;4spn7Uteuh*m1WNN>r3vp zZLurN|M~YP7yJ15-Dc{_rz=(d?2AhFfA%(?pS~73e=@aG_47mT_^a&vVT8>ew)6SJ zl^e}!;GI@|GR4Cr&=}RllA}AwiN&W{mH>T-hTf2=k~4Km&8wPuqSN( zVeOgukoEuXDC>Wrf{$omu~JkYWY-kG8!U_qgj5^*d79Z|C!kj`73aew15v znDzgGr1snU_J&KW|NHG1b*aN``+cjt{Te~@H?^6{R@f*%v+dWQSV8k|+kUn0y6v~1 z+q2wYw~&1Nnd=LyKg#XWL#+MoPiwz7H+;<6k1`xQes7|@>6k(7ugFT4lK=CsO-S#b z)~-IO&Gqo|2j$=X{;cspIlqDa=U3hOV>x7^<#&!^rOkipccqO#!$N)=_cMO?r|~nB>O8zV0cesa_KPdkNzjvaAnGe$k)m|EA5KYW+Rj|E3&EbpH7B+c5jPE3?m^|AyJO`$v-NpFcki zvv2p$B-`iDpTq3i{XEI`NB7K^i$<_Gp^C9h}`$rw+Kf!2mEj*Br^B`KN4QJF8BdIoC%2xe!!1}7p@C_zz=6aB7-0BBjJVXf*M5?;72_yIqh35g7Tz>kC%t_yy^4`)ImgCFoC;f3phAMnGO zkjUT%{787=y5I-=a3&-&_yIo>Ubrs!0Y97xi414XF?)_AMhjLh3kSJ z@WYvq$lwS3NOw+Kf!2mEj*Br^B`KN4QJF8BdIoC%2xe!!1}7p@C_zz=6aB7-0B zBjJVXf*M5?;72_yIqh35g7Tz>kC%t_yy^4`)Im zgCFoC;f3phAMnGOkjUT%{787=y5I-=a3&-&_yIo>Ubrs!0Y97xi414 zXF?)_AMhjLh3kSJ@WYvq$lwS3NOw+Kf!2mEj*Br^B`KN4QJF8BdIoC%2xe!!1} z7p@C_zz=6aB7-0BBjJVXf*M5?;72_yIqh35g7T zz>kC%t_yy^4`)ImgCFoC;f3phAMnGOkjUT%{787=y5I-=a3&-&_yIo>Ubrs!0Y97x zi414XF?)_AMhjLh3kSJ@WYvq$lwS3NOw+Kf!2mEj*Br^B`KN4QJ zF8BdIoC%2xe!!1}7p@C_zz=6aB7-0BBjJVXf*M z5?;72_yIqh35g7Tz>kC%t_yy^4`)ImgCFqI&WpPBEA3+Dm-}zgO7)HZ$XtCV@)ZP? zW%V_$#m8?ZzheBZX%%j0|4-MpPUs(heUVQ6-cvWUm-qJF(e^tSCNNbNcM5k+G@Mg?>uv8g$I;l%0j ziMfM&#%tNh(PI-Q5AHcWS<7a|re2)Tr*i}5c#l5L8+qQyH+9X`-Lt2r?>=?*b&1(EmsxY5UYm7R_nrCm9?$iG{Q9Q0>-E3r zZa7tPJ-?jv*UVp!CxyDXwu^=F8_&&oc=OsirZ;Wm`gS*DBh%ZQ%Rc+(OuSz(eJ*a# aFLc&!meqG?yK^Q&(?;|E^+xVB$NvF>U(YT8 literal 0 HcmV?d00001 diff --git a/lite/kernels/mlu/bridges/multiclass_nms_op_test.cc b/lite/kernels/mlu/bridges/multiclass_nms_op_test.cc new file mode 100644 index 0000000000..6618252ca7 --- /dev/null +++ b/lite/kernels/mlu/bridges/multiclass_nms_op_test.cc @@ -0,0 +1,604 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/multiclass_nms_op.h" + +#include + +#include +#include +#include +#include + +#include "lite/core/op_registry.h" +#include "lite/kernels/mlu/bridges/test_helper.h" +#include "lite/kernels/npu/bridges/registry.h" + +namespace paddle { +namespace lite { +namespace subgraph { +namespace mlu { + +std::vector gen_random_boxes(int box_num, int img_w, int img_h) { + std::vector boxes; + unsigned int SEED = 1; + + for (size_t i = 0; i < box_num; i++) { + float x = rand_r(&SEED) / static_cast(RAND_MAX) * img_w; + float w = rand_r(&SEED) / static_cast(RAND_MAX) * img_w; + float y = rand_r(&SEED) / static_cast(RAND_MAX) * img_h; + float h = rand_r(&SEED) / static_cast(RAND_MAX) * img_h; + float xmin = std::max(0.0f, (x - w) / 2); + float ymin = std::max(0.0f, (y - h) / 2); + float xmax = std::min(static_cast(img_w), (x + w) / 2); + float ymax = std::min(static_cast(img_h), (y + h) / 2); + boxes.push_back(xmin); + boxes.push_back(ymin); + boxes.push_back(xmax); + boxes.push_back(ymax); + } + return boxes; +} + +std::vector gen_random_scores(int box_num, int class_num) { + std::vector scores; + unsigned int SEED = 1; + for (size_t i = 0; i < box_num; i++) { + for (size_t i = 0; i < class_num; i++) { + scores.push_back(rand_r(&SEED) / static_cast(RAND_MAX)); + } + } + return scores; +} + +float Area(float box[4]) { + float xmin = box[0]; + float ymin = box[1]; + float xmax = box[2]; + float ymax = box[3]; + CHECK(xmax > xmin) << "xmax: " << xmax << " xmin: " << xmin; + CHECK(ymax > ymin) << "ymax: " << ymax << " ymin: " << ymin; + float w = xmax - xmin; + float h = ymax - ymin; + return w * h; +} + +// overlap may < 0 +float overlap(float min1, float max1, float min2, float max2) { + return ((max1 - min1) + (max2 - min2)) - + (std::max(max2, max1) - std::min(min1, min2)); +} + +float IntersectionArea(float box1[4], float box2[4]) { + float box1_xmin = box1[0]; + float box1_ymin = box1[1]; + float box1_xmax = box1[2]; + float box1_ymax = box1[3]; + + float box2_xmin = box2[0]; + float box2_ymin = box2[1]; + float box2_xmax = box2[2]; + float box2_ymax = box2[3]; + + float x_overlap = overlap(box1_xmin, box1_xmax, box2_xmin, box2_xmax); + float y_overlap = overlap(box1_ymin, box1_ymax, box2_ymin, box2_ymax); + float intersection_area = x_overlap * y_overlap; + return std::max(intersection_area, 0.0f); +} + +float IOU(float box1[4], float box2[4]) { + float area1 = Area(box1); + float area2 = Area(box2); + float intersection_area = IntersectionArea(box1, box2); + float union_area = area1 + area2 - intersection_area; + return intersection_area / union_area; +} + +template +void VecToFile(const std::vector& vec, std::string filename) { + std::ofstream f(filename, std::ios::out); + if (!f) { + LOG(FATAL) << filename << "not exist!" << std::endl; + } + for (size_t i = 0; i < vec.size(); i++) { + f << vec[i] << std::endl; + } + f.close(); +} + +template +void ArrayToFile(const T* data, int size, std::string filename) { + std::ofstream f(filename, std::ios::out); + if (!f) { + LOG(FATAL) << filename << "not exist!" << std::endl; + } + for (size_t i = 0; i < size; i++) { + f << data[i] << std::endl; + } + f.close(); +} + +void ToFile(Tensor* tensor, std::string file_name) { + int count = tensor->dims().production(); + auto data = tensor->mutable_data(); + std::ostringstream outs; + for (size_t i = 0; i < count; i++) { + outs << data[i] << std::endl; + } + std::ofstream of; + of.open(file_name, std::ios::out); + of << outs.str(); + of.close(); +} + +void FromFile(Tensor* tensor, std::string file_name) { + LOG(INFO) << " from file:" << file_name << std::endl; + std::ifstream f; + f.open(file_name, std::ios::in); + if (f.good()) { + for (size_t i = 0; i < tensor->dims().production(); i++) { + f >> tensor->mutable_data()[i]; + } + } else { + LOG(FATAL) << "can not open " << file_name << "to read" << std::endl; + } + f.close(); +} + +template +static bool sort_score_pair_descend(const std::pair& pair1, + const std::pair& pair2) { + return pair1.first > pair2.first; +} + +template +void get_max_score_index(const dtype* scores, + int num, + float threshold, + int top_k, + std::vector>* score_index_vec) { + // ArrayToFile(scores, 100, "cpu_score.txt"); + //! Generate index score pairs. + for (int i = 0; i < num; ++i) { + if (scores[i] > threshold) { + score_index_vec->push_back(std::make_pair(scores[i], i)); + } + } + + //! Sort the score pair according to the scores in descending order + std::stable_sort(score_index_vec->begin(), + score_index_vec->end(), + sort_score_pair_descend); + + //! Keep top_k scores if needed. + if (top_k > -1 && top_k < score_index_vec->size()) { + score_index_vec->resize(top_k); + } +} + +template +dtype bbox_size(const dtype* bbox, bool normalized = true) { + if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) { + // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0. + return dtype(0.); + } else { + const dtype width = bbox[2] - bbox[0]; + const dtype height = bbox[3] - bbox[1]; + + if (normalized) { + return width * height; + } else { + // If bbox is not within range [0, 1]. + return (width + 1) * (height + 1); + } + } +} + +template +dtype jaccard_overlap(const dtype* bbox1, const dtype* bbox2) { + if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] || bbox2[1] > bbox1[3] || + bbox2[3] < bbox1[1]) { + return dtype(0.); + } else { + const dtype inter_xmin = std::max(bbox1[0], bbox2[0]); + const dtype inter_ymin = std::max(bbox1[1], bbox2[1]); + const dtype inter_xmax = std::min(bbox1[2], bbox2[2]); + const dtype inter_ymax = std::min(bbox1[3], bbox2[3]); + + const dtype inter_width = inter_xmax - inter_xmin; + const dtype inter_height = inter_ymax - inter_ymin; + const dtype inter_size = inter_width * inter_height; + + const dtype bbox1_size = bbox_size(bbox1); + const dtype bbox2_size = bbox_size(bbox2); + + return inter_size / (bbox1_size + bbox2_size - inter_size); + } +} + +template +void apply_nms_fast(const dtype* bboxes, + const dtype* scores, + int num, + float score_threshold, + float nms_threshold, + float eta, + int top_k, + std::vector* indices) { + // Get top_k scores (with corresponding indices). + std::vector> score_index_vec; + get_max_score_index(scores, num, score_threshold, top_k, &score_index_vec); + + // Do nms. + float adaptive_threshold = nms_threshold; + indices->clear(); + + while (score_index_vec.size() != 0) { + const int idx = score_index_vec.front().second; + bool keep = true; + + for (int k = 0; k < indices->size(); ++k) { + if (keep) { + const int kept_idx = (*indices)[k]; + float overlap = + jaccard_overlap(bboxes + idx * 4, bboxes + kept_idx * 4); + keep = overlap <= adaptive_threshold; + } else { + break; + } + } + + if (keep) { + indices->push_back(idx); + } + + score_index_vec.erase(score_index_vec.begin()); + + if (keep && eta < 1 && adaptive_threshold > 0.5) { + adaptive_threshold *= eta; + } + } +} + +template +void multiclass_nms_compute_ref(const operators::MulticlassNmsParam& param, + int class_num, + const std::vector& priors, + bool share_location, + std::vector* result) { + int background_id = param.background_label; + int keep_topk = param.keep_top_k; + int nms_topk = param.nms_top_k; + float conf_thresh = param.score_threshold; + float nms_thresh = param.nms_threshold; + float nms_eta = param.nms_eta; + const dtype* bbox_data = param.bboxes->data(); + const dtype* conf_data = param.scores->data(); + (*result).clear(); + + int num_kept = 0; + std::vector>> all_indices; + int64_t conf_offset = 0; + int64_t bbox_offset = 0; + for (int i = 0; i < priors.size(); ++i) { + std::map> indices; + int num_det = 0; + int num_priors = priors[i]; + + int conf_idx = class_num * conf_offset; + int bbox_idx = + share_location ? bbox_offset * 4 : bbox_offset * 4 * class_num; + + for (int c = 0; c < class_num; ++c) { + if (c == background_id) { + // Ignore background class + continue; + } + + const dtype* cur_conf_data = conf_data + conf_idx + c * num_priors; + const dtype* cur_bbox_data = bbox_data + bbox_idx; + + if (!share_location) { + cur_bbox_data += c * num_priors * 4; + } + + apply_nms_fast(cur_bbox_data, + cur_conf_data, + num_priors, + conf_thresh, + nms_thresh, + nms_eta, + nms_topk, + &(indices[c])); + num_det += indices[c].size(); + } + + if (keep_topk > -1 && num_det > keep_topk) { + std::vector>> score_index_pairs; + + for (auto it = indices.begin(); it != indices.end(); ++it) { + int label = it->first; + const std::vector& label_indices = it->second; + + for (int j = 0; j < label_indices.size(); ++j) { + int idx = label_indices[j]; + float score = conf_data[conf_idx + label * num_priors + idx]; + score_index_pairs.push_back( + std::make_pair(score, std::make_pair(label, idx))); + } + } + + // Keep top k results per image. + std::stable_sort(score_index_pairs.begin(), + score_index_pairs.end(), + sort_score_pair_descend>); + score_index_pairs.resize(keep_topk); + // Store the new indices. + std::map> new_indices; + + for (int j = 0; j < score_index_pairs.size(); ++j) { + int label = score_index_pairs[j].second.first; + int idx = score_index_pairs[j].second.second; + new_indices[label].push_back(idx); + } + + all_indices.push_back(new_indices); + num_kept += keep_topk; + } else { + all_indices.push_back(indices); + num_kept += num_det; + } + conf_offset += num_priors; + bbox_offset += num_priors; + } + + if (num_kept == 0) { + (*result).clear(); + (*result).resize(1); + (*result)[0] = -1; + return; + } else { + (*result).resize(num_kept * 6); + } + + int count = 0; + + conf_offset = 0; + bbox_offset = 0; + for (int i = 0; i < priors.size(); ++i) { + int num_priors = priors[i]; + int conf_idx = class_num * conf_offset; + int bbox_idx = + share_location ? bbox_offset * 4 : bbox_offset * 4 * class_num; + + for (auto it = all_indices[i].begin(); it != all_indices[i].end(); ++it) { + int label = it->first; + std::vector& indices = it->second; + const dtype* cur_conf_data = conf_data + conf_idx + label * num_priors; + const dtype* cur_bbox_data = bbox_data + bbox_idx; + + if (!share_location) { + cur_bbox_data += label * num_priors * 4; + } + + for (int j = 0; j < indices.size(); ++j) { + int idx = indices[j]; + (*result)[count * 6] = label; + (*result)[count * 6 + 1] = cur_conf_data[idx]; + + for (int k = 0; k < 4; ++k) { + (*result)[count * 6 + 2 + k] = cur_bbox_data[idx * 4 + k]; + } + + ++count; + } + } + conf_offset += num_priors; + bbox_offset += num_priors; + } +} + +void test_multiclass_nms(float score_threshold, + int nms_top_k, + int keep_top_k, + float nms_threshold, + bool normalized, + float nms_eta, + int background_label, + int batch_size, + int class_num, + int num_boxes, + int box_size, + int core_num) { + // prepare input&output variables + Scope scope; + std::string bboxes_var_name = "BBoxes"; + std::string scores_var_name = "Scores"; + std::string out_var_name = "Out"; + std::string out_num_var_name = + "nms_out_num"; // must be this name,corespond with + // lite/operators/multiclass_nms_op.cc + auto* bboxes = scope.Var(bboxes_var_name)->GetMutable(); + auto* scores = scope.Var(scores_var_name)->GetMutable(); + auto* out = scope.Var(out_var_name)->GetMutable(); + auto* out_num = scope.Var(out_num_var_name)->GetMutable(); + + std::vector bboxes_shape = {batch_size, num_boxes, box_size}; + std::vector scores_shape = {batch_size, class_num, num_boxes}; + std::vector out_num_shape = {batch_size}; + + bboxes->Resize(bboxes_shape); + scores->Resize(scores_shape); + out_num->Resize(out_num_shape); + + std::vector bboxes_vec = gen_random_boxes(num_boxes, 1024, 1024); + std::vector scores_vec = gen_random_scores(num_boxes, class_num); + + for (size_t i = 1; i < bboxes_vec.size(); i++) { + bboxes->mutable_data()[i] = bboxes_vec[i]; + } + for (size_t i = 1; i < scores_vec.size(); i++) { + scores->mutable_data()[i] = scores_vec[i]; + } + + // initialize op desc + cpp::OpDesc opdesc; + opdesc.SetType("multiclass_nms"); + opdesc.SetInput("BBoxes", {bboxes_var_name}); + opdesc.SetInput("Scores", {scores_var_name}); + opdesc.SetOutput("Out", {out_var_name}); + opdesc.SetAttr("background_label", background_label); + opdesc.SetAttr("keep_top_k", keep_top_k); + opdesc.SetAttr("nms_top_k", nms_top_k); + opdesc.SetAttr("score_threshold", score_threshold); + opdesc.SetAttr("nms_threshold", nms_threshold); + opdesc.SetAttr("nms_eta", nms_eta); + opdesc.SetAttr("normalized", normalized); + + auto op = CreateOp(opdesc, &scope); + // out_ref->CopyDataFrom(*out); + + operators::MulticlassNmsParam param; + auto bboxes_name = opdesc.Input("BBoxes").front(); + auto scores_name = opdesc.Input("Scores").front(); + auto out_name = opdesc.Output("Out").front(); + std::vector output_arg_names = opdesc.OutputArgumentNames(); + + param.bboxes = bboxes; + param.scores = scores; + param.out = out; + param.background_label = opdesc.GetAttr("background_label"); + param.keep_top_k = opdesc.GetAttr("keep_top_k"); + param.nms_top_k = opdesc.GetAttr("nms_top_k"); + param.score_threshold = opdesc.GetAttr("score_threshold"); + param.nms_threshold = opdesc.GetAttr("nms_threshold"); + param.nms_eta = opdesc.GetAttr("nms_eta"); + if (opdesc.HasAttr("normalized")) { + param.normalized = opdesc.GetAttr("normalized"); + } + const std::vector& priors = {num_boxes}; // batch_size + std::vector result; + multiclass_nms_compute_ref(param, class_num, priors, true, &result); + + // trans + Tensor bboxes_trans; + bboxes_trans.Resize({bboxes->dims()}); + transpose(bboxes->mutable_data(), + bboxes_trans.mutable_data(), + {static_cast(bboxes->dims()[0]), + static_cast(bboxes->dims()[1]), + static_cast(bboxes->dims()[2])}, + {0, 2, 1}); + bboxes->CopyDataFrom(bboxes_trans); + + Tensor scores_trans; + scores_trans.Resize({scores->dims()}); + transpose(scores->mutable_data(), + scores_trans.mutable_data(), + {static_cast(scores->dims()[0]), + static_cast(scores->dims()[1]), + static_cast(scores->dims()[2])}, + {0, 2, 1}); + scores->CopyDataFrom(scores_trans); + + LaunchOp( + op, {bboxes_var_name, scores_var_name}, {out_var_name, out_num_var_name}); + + // ToFile(out, "nms_out_mlu_before_trans.txt"); + // out trans + Tensor out_trans; + out_trans.Resize({out->dims()}); + transpose(out->mutable_data(), + out_trans.mutable_data(), + {static_cast(out->dims()[0]), + static_cast(out->dims()[2]), + static_cast(out->dims()[1])}, // 0 2 1 on mlu + {0, 2, 1}); + out->CopyDataFrom(out_trans); + + // ToFile(out, "nms_out_mlu.txt"); + // ToFile(out_num, "nms_out_num_mlu.txt"); + // VecToFile(result, "nms_out_cpu.txt"); + + // auto out_data = out->mutable_data(); + int num_box = out->dims()[1]; + int match_count = 0; + std::vector matched_cpu_index; + for (int i = 0; i < num_box; i++) { + float mlu_box[4]; + mlu_box[0] = out->mutable_data()[i * 6 + 2]; + mlu_box[1] = out->mutable_data()[i * 6 + 3]; + mlu_box[2] = out->mutable_data()[i * 6 + 4]; + mlu_box[3] = out->mutable_data()[i * 6 + 5]; + bool match = false; + for (size_t j = 0; j < num_box; j++) { + // if j th cpu box has matched some mlu box, do not use if to match other + // mlu box + if (std::find(std::begin(matched_cpu_index), + std::end(matched_cpu_index), + j) != std::end(matched_cpu_index)) { + continue; + } + float cpu_box[4]; + cpu_box[0] = result[j * 6 + 2]; + cpu_box[1] = result[j * 6 + 3]; + cpu_box[2] = result[j * 6 + 4]; + cpu_box[3] = result[j * 6 + 5]; + if (IOU(mlu_box, cpu_box) >= 0.9) { + match = true; + matched_cpu_index.push_back(j); + break; + } + } + if (match) { + match_count += 1; + } + } + EXPECT_NEAR(match_count, num_box, 0); +} + +TEST(MLUBridges, multiclass_nms) { + int background_label = -1; + int keep_top_k = 100; + int nms_top_k = 1000; + float score_threshold = 0.01; + float nms_threshold = 0.45; + int nms_eta = 1; + bool normalized = 0; + int batch_size = 1; + int num_boxes = 22743; + int class_num = 80; + int core_num = 4; + int box_size = 4; + + test_multiclass_nms(score_threshold, + nms_top_k, + keep_top_k, + nms_threshold, + normalized, + nms_eta, + background_label, + batch_size, + class_num, + num_boxes, + box_size, + core_num); +} + +} // namespace mlu +} // namespace subgraph +} // namespace lite +} // namespace paddle + +USE_SUBGRAPH_BRIDGE(multiclass_nms, kMLU) diff --git a/lite/kernels/mlu/bridges/paddle_use_bridges.h b/lite/kernels/mlu/bridges/paddle_use_bridges.h index be5c64b3b7..cddd448921 100644 --- a/lite/kernels/mlu/bridges/paddle_use_bridges.h +++ b/lite/kernels/mlu/bridges/paddle_use_bridges.h @@ -43,6 +43,9 @@ USE_SUBGRAPH_BRIDGE(flatten, kMLU); USE_SUBGRAPH_BRIDGE(flatten2, kMLU); USE_SUBGRAPH_BRIDGE(reshape, kMLU); USE_SUBGRAPH_BRIDGE(reshape2, kMLU); +USE_SUBGRAPH_BRIDGE(multiclass_nms, kMLU); +USE_SUBGRAPH_BRIDGE(density_prior_box, kMLU); +USE_SUBGRAPH_BRIDGE(box_coder, kMLU); #ifdef LITE_BUILD_EXTRA USE_SUBGRAPH_BRIDGE(gather, kMLU); USE_SUBGRAPH_BRIDGE(lrn, kMLU) diff --git a/lite/kernels/mlu/bridges/utility.h b/lite/kernels/mlu/bridges/utility.h index fe886c5e44..ba187cfa24 100644 --- a/lite/kernels/mlu/bridges/utility.h +++ b/lite/kernels/mlu/bridges/utility.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include -- GitLab