未验证 提交 f99e28b4 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] support expand op (#3594)

上级 e9b94f04
......@@ -41,7 +41,6 @@ add_kernel(slice_compute_arm ARM basic SRCS slice_compute.cc DEPS ${lite_kernel_
add_kernel(cast_compute_arm ARM basic SRCS cast_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(squeeze_compute_arm ARM basic SRCS squeeze_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(unsqueeze_compute_arm ARM basic SRCS unsqueeze_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(expand_compute_arm ARM basic SRCS expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(reduce_mean_compute_arm ARM basic SRCS reduce_mean_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(stack_compute_arm ARM basic SRCS stack_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(affine_channel_compute_arm ARM basic SRCS affine_channel_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
......@@ -4,6 +4,7 @@ add_kernel(feed_compute_host Host basic SRCS feed_compute.cc DEPS ${lite_kernel_
add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
add_kernel(expand_compute_host Host basic SRCS expand_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
......
......@@ -12,24 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/expand_compute.h"
#include "lite/kernels/host/expand_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
void ExpandCompute::Run() {
auto& param = Param<operators::ExpandParam>();
template <typename T, PrecisionType PType>
void ExpandCompute<T, PType>::Run() {
auto& param = this->template Param<operators::ExpandParam>();
const auto* x = param.X;
auto* out = param.Out;
std::vector<int> expand_times = param.expand_times;
const float* src = x->data<float>();
float* dst = out->mutable_data<float>();
const T* src = x->template data<T>();
T* dst = out->template mutable_data<T>();
int dims = expand_times.size();
DDim in_shape = x->dims();
......@@ -42,7 +41,7 @@ void ExpandCompute::Run() {
for (int k = 0; k < expand_times[i]; ++k) {
memcpy(dst + (j * expand_times[i] + k) * inner_num,
src + j * inner_num,
sizeof(float) * inner_num);
sizeof(T) * inner_num);
}
}
inner_num *= expand_times[i];
......@@ -53,20 +52,27 @@ void ExpandCompute::Run() {
for (int k = expand_times[i] - 1; k >= 0; --k) {
memcpy(dst + (j * expand_times[i] + k) * inner_num,
dst + j * inner_num,
sizeof(float) * inner_num);
sizeof(T) * inner_num);
}
}
inner_num *= expand_times[i];
}
}
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
expand, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ExpandCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
using expand_float =
paddle::lite::kernels::host::ExpandCompute<float, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(expand, kHost, kFloat, kAny, expand_float, def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.Finalize();
......@@ -19,16 +19,18 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
class ExpandCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
template <typename T, PrecisionType PType>
class ExpandCompute
: public KernelLite<TARGET(kHost), PType, DATALAYOUT(kAny)> {
public:
void Run() override;
virtual ~ExpandCompute() = default;
};
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -49,6 +49,7 @@ lite_cc_library(subgraph_bridge_fill_constant_op_npu SRCS fill_constant_op.cc DE
lite_cc_library(subgraph_bridge_fill_constant_batch_size_like_op_npu SRCS fill_constant_batch_size_like_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_increment_op_npu SRCS increment_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_compare_op_npu SRCS compare_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_expand_op_npu SRCS expand_op.cc DEPS ${npu_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_shape_op_npu SRCS shape_op.cc DEPS ${npu_subgraph_bridge_deps})
......@@ -87,6 +88,7 @@ set(npu_subgraph_bridges
subgraph_bridge_fill_constant_batch_size_like_op_npu
subgraph_bridge_increment_op_npu
subgraph_bridge_compare_op_npu
subgraph_bridge_expand_op_npu
CACHE INTERNAL "npu_subgraph_bridges")
message(STATUS "+++++ npu_subgraph_bridges: ${npu_subgraph_bridges}")
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int ExpandConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
auto expand_times = op_info->GetAttr<std::vector<int>>("expand_times");
// x node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// w node
std::shared_ptr<Node> w_node = graph->Add(out_name + "/w", expand_times);
// expand node
auto expand_node = graph->Add<ge::op::Tile>(out_name);
auto expand_op = expand_node->data<ge::op::Tile>();
expand_op->set_input_x(*x_node->data());
expand_op->set_input_w(*w_node->data());
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(expand,
kNPU,
paddle::lite::subgraph::npu::ExpandConverter);
......@@ -38,6 +38,7 @@ USE_SUBGRAPH_BRIDGE(elementwise_add, kNPU);
USE_SUBGRAPH_BRIDGE(elementwise_sub, kNPU);
USE_SUBGRAPH_BRIDGE(elementwise_mul, kNPU);
USE_SUBGRAPH_BRIDGE(elementwise_div, kNPU);
USE_SUBGRAPH_BRIDGE(expand, kNPU);
USE_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kNPU);
USE_SUBGRAPH_BRIDGE(fusion_elementwise_sub_activation, kNPU);
USE_SUBGRAPH_BRIDGE(fusion_elementwise_mul_activation, kNPU);
......
......@@ -84,7 +84,7 @@ class ExpandComputeTester : public arena::TestCase {
}
};
void test_expand_3dim(Place place) {
void test_expand_3dim(Place place, float abs_error) {
for (std::vector<int> expand_times : {std::vector<int>({2, 3, 1}),
std::vector<int>({2, 2, 2}),
std::vector<int>({3, 1, 2})}) {
......@@ -93,7 +93,7 @@ void test_expand_3dim(Place place) {
for (int W : {4}) {
std::unique_ptr<arena::TestCase> tester(new ExpandComputeTester(
place, "def", expand_times, DDim({C, H, W})));
arena::Arena arena(std::move(tester), place, 2e-5);
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
......@@ -101,7 +101,7 @@ void test_expand_3dim(Place place) {
}
}
void test_expand_4dim(Place place) {
void test_expand_4dim(Place place, float abs_error) {
for (std::vector<int> expand_times : {std::vector<int>({2, 3, 1, 4}),
std::vector<int>({2, 2, 2, 2}),
std::vector<int>({3, 1, 2, 1})}) {
......@@ -111,7 +111,7 @@ void test_expand_4dim(Place place) {
for (int W : {4}) {
std::unique_ptr<arena::TestCase> tester(new ExpandComputeTester(
place, "def", expand_times, DDim({N, C, H, W})));
arena::Arena arena(std::move(tester), place, 2e-5);
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
......@@ -121,14 +121,19 @@ void test_expand_4dim(Place place) {
}
TEST(Expand, precision) {
#ifdef LITE_WITH_X86
Place place(TARGET(kX86));
#endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_expand_3dim(place);
test_expand_4dim(place);
float abs_error = 1e-5;
Place place;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place = TARGET(kHost);
#else
return;
#endif
test_expand_3dim(place, abs_error);
test_expand_4dim(place, abs_error);
}
} // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册