未验证 提交 9edfecaa 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] support increment, less than op bridge (#3147)

* [NPU] subgraph's precision register to kAny
上级 ff332144
......@@ -59,6 +59,8 @@ void TestCase::CreateInstruction() {
CHECK(it != kernels.end()) << "failed to create the kernel in "
<< place_.DebugString()
<< " with alias: " << alias_;
// reset final place
place_ = (*it)->place();
// prepare context
(*it)->SetContext(std::move(ctx_));
instruction_.reset(new Instruction(op, std::move(*it)));
......
......@@ -47,18 +47,19 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
return kernels;
}
std::set<Place> place_set;
for (auto place : places) {
place_set.insert(place);
// Pick kernels those support any Precision and any DataLayout
place.precision = PRECISION(kAny);
place_set.insert(place);
place.layout = DATALAYOUT(kAny);
place_set.insert(place);
std::set<Place> expanded_places(places.begin(), places.end());
for (auto &place : places) {
// Pick kernels those support any Precision and any DataLayout, For example:
// kARM,kFloat,kNCHW -> kARM,kFloat,kAny; kARM,kAny,kNCHW; kARM,kAny,kAny
expanded_places.insert(
Place(place.target, place.precision, DATALAYOUT(kAny)));
expanded_places.insert(Place(place.target, PRECISION(kAny), place.layout));
expanded_places.insert(
Place(place.target, PRECISION(kAny), DATALAYOUT(kAny)));
}
std::set<TargetType> targets;
for (auto place : place_set) {
for (auto place : expanded_places) {
pick_kernel(place);
targets.insert(place.target);
}
......
......@@ -91,7 +91,6 @@ add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS
add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(logical_compute_arm ARM extra SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(less_than_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(while_compute_arm ARM extra SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(compare_compute_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
......@@ -73,8 +73,6 @@ inline void get_mid_dims(const lite::DDim &x_dims,
(*post) *= x_dims[i];
}
}
template <template <typename T> class Functor>
void CompareCompute<Functor>::PrepareForRun() {}
template <template <typename T> class Functor>
void CompareCompute<Functor>::Run() {
......@@ -177,7 +175,6 @@ void CompareCompute_int64<Functor>::Run() {
for (int inner_id = 0; inner_id < inner_num; ++inner_id) {
int index = (outer_id * mid_num + mid_id) * inner_num + inner_id;
z[index] = CompareFunctor()(x[index], y_data);
// z[index] = x[index] < y_data;
}
}
}
......@@ -189,50 +186,78 @@ void CompareCompute_int64<Functor>::Run() {
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(less_than,
REGISTER_LITE_KERNEL(equal,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_LessThanFunctor>,
paddle::lite::kernels::arm::_EqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_than,
REGISTER_LITE_KERNEL(equal,
kARM,
kInt64,
kInt32,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int64<
paddle::lite::kernels::arm::_LessThanFunctor>,
paddle::lite::kernels::arm::CompareCompute_int32<
paddle::lite::kernels::arm::_EqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(equal,
REGISTER_LITE_KERNEL(not_equal,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_EqualFunctor>,
paddle::lite::kernels::arm::_NotEqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(not_equal,
REGISTER_LITE_KERNEL(less_than,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_NotEqualFunctor>,
paddle::lite::kernels::arm::_LessThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_than,
kARM,
kInt32,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int32<
paddle::lite::kernels::arm::_LessThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_than,
kARM,
kInt64,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int64<
paddle::lite::kernels::arm::_LessThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_equal,
kARM,
kFloat,
......@@ -244,6 +269,7 @@ REGISTER_LITE_KERNEL(less_equal,
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(greater_than,
kARM,
kFloat,
......@@ -255,6 +281,7 @@ REGISTER_LITE_KERNEL(greater_than,
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(greater_equal,
kARM,
kFloat,
......@@ -266,27 +293,3 @@ REGISTER_LITE_KERNEL(greater_equal,
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_than,
kARM,
kInt32,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int32<
paddle::lite::kernels::arm::_LessThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(equal,
kARM,
kInt32,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int32<
paddle::lite::kernels::arm::_EqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
......@@ -26,10 +26,6 @@ namespace arm {
template <template <typename T> class Functor>
class CompareCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
using param_t = operators::LogicalParam;
void PrepareForRun() override;
void Run() override;
~CompareCompute() {}
......@@ -39,8 +35,6 @@ template <template <typename T> class Functor>
class CompareCompute_int32
: public KernelLite<TARGET(kARM), PRECISION(kInt32)> {
public:
using param_t = operators::LogicalParam;
void Run() override;
~CompareCompute_int32() {}
......@@ -50,8 +44,6 @@ template <template <typename T> class Functor>
class CompareCompute_int64
: public KernelLite<TARGET(kARM), PRECISION(kInt64)> {
public:
using param_t = operators::LogicalParam;
void Run() override;
~CompareCompute_int64() {}
......
......@@ -46,6 +46,9 @@ lite_cc_library(subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS ${npu_sub
lite_cc_library(subgraph_bridge_layer_norm_op_npu SRCS layer_norm_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_fill_constant_op_npu SRCS fill_constant_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_fill_constant_batch_size_like_op_npu SRCS fill_constant_batch_size_like_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_increment_op_npu SRCS increment_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_compare_op_npu SRCS compare_op.cc DEPS ${npu_subgraph_bridge_deps})
set(npu_subgraph_bridges
subgraph_bridge_registry
......@@ -79,6 +82,8 @@ set(npu_subgraph_bridges
subgraph_bridge_layer_norm_op_npu
subgraph_bridge_fill_constant_op_npu
subgraph_bridge_fill_constant_batch_size_like_op_npu
subgraph_bridge_increment_op_npu
subgraph_bridge_compare_op_npu
CACHE INTERNAL "npu_subgraph_bridges")
message(STATUS "+++++ npu_subgraph_bridges: ${npu_subgraph_bridges}")
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int LessThanConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindTensor(x_name);
auto x_dims = x->dims();
auto y_name = op_info->Input("Y").front();
auto y = scope->FindTensor(y_name);
auto y_dims = y->dims();
auto out_name = op_info->Output("Out").front();
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// Y node
std::shared_ptr<Node> y_node = nullptr;
if (graph->Has(y_name)) {
y_node = graph->Get(y_name);
} else {
y_node = graph->Add(y_name, *y);
}
// add node
auto less_than_node = graph->Add<ge::op::Less>(out_name, PRECISION(kBool));
auto less_than_op = less_than_node->data<ge::op::Less>();
less_than_op->set_input_x1(*x_node->data());
less_than_op->set_input_x2(*y_node->data());
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(less_than,
kNPU,
paddle::lite::subgraph::npu::LessThanConverter);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int IncrementConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
float step = op_info->GetAttr<float>("step");
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x, CvtShape(x_dims));
}
// Y node
Tensor y;
y.Resize({1});
auto y_data = y.mutable_data<float>();
y_data[0] = step;
y.set_persistable(true);
auto y_node = graph->Add(out_name + "/y", y);
// add node
auto increment_node = graph->Add<ge::op::Add>(out_name);
auto increment_op = increment_node->data<ge::op::Add>();
increment_op->set_input_x1(*x_node->data());
increment_op->set_input_x2(*y_node->data());
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(increment,
kNPU,
paddle::lite::subgraph::npu::IncrementConverter);
......@@ -23,6 +23,7 @@ USE_SUBGRAPH_BRIDGE(softsign, kNPU);
USE_SUBGRAPH_BRIDGE(hard_sigmoid, kNPU);
USE_SUBGRAPH_BRIDGE(batch_norm, kNPU);
USE_SUBGRAPH_BRIDGE(less_than, kNPU);
USE_SUBGRAPH_BRIDGE(concat, kNPU);
USE_SUBGRAPH_BRIDGE(conv2d, kNPU);
USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kNPU);
......@@ -40,9 +41,12 @@ USE_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, kNPU);
USE_SUBGRAPH_BRIDGE(fill_constant, kNPU)
USE_SUBGRAPH_BRIDGE(fill_constant_batch_size_like, kNPU)
USE_SUBGRAPH_BRIDGE(increment, kNPU);
USE_SUBGRAPH_BRIDGE(instance_norm, kNPU);
USE_SUBGRAPH_BRIDGE(fc, kNPU);
USE_SUBGRAPH_BRIDGE(bilinear_interp, kNPU);
USE_SUBGRAPH_BRIDGE(nearest_interp, kNPU);
USE_SUBGRAPH_BRIDGE(layer_norm, kNPU);
USE_SUBGRAPH_BRIDGE(matmul, kNPU);
USE_SUBGRAPH_BRIDGE(mul, kNPU);
USE_SUBGRAPH_BRIDGE(pad2d, kNPU);
......@@ -60,5 +64,3 @@ USE_SUBGRAPH_BRIDGE(transpose, kNPU);
USE_SUBGRAPH_BRIDGE(transpose2, kNPU);
USE_SUBGRAPH_BRIDGE(unsqueeze, kNPU);
USE_SUBGRAPH_BRIDGE(unsqueeze2, kNPU);
USE_SUBGRAPH_BRIDGE(instance_norm, kNPU);
USE_SUBGRAPH_BRIDGE(layer_norm, kNPU);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/scale_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
namespace bridges {
void scale_ref(const std::shared_ptr<operators::ScaleOp> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
float scale = op_info->GetAttr<float>("scale");
float bias = op_info->GetAttr<float>("bias");
bool bias_after_scale = op_info->GetAttr<bool>("bias_after_scale");
if (!bias_after_scale) {
bias *= scale;
}
auto x_data = x->data<float>();
auto out_data = out->mutable_data<float>();
DDim x_dims = x->dims();
DDim out_dims = out->dims();
CHECK_EQ(x_dims.production(), out_dims.production());
for (int i = 0; i < out_dims.production(); i++) {
out_data[i] = x_data[i] * scale + bias;
}
}
void test_scale(int bs,
int ic,
int ih,
int iw,
bool bias_after_scale,
float scale,
float bias) {
// prepare input&output variables
Scope scope;
std::string x_var_name("x");
std::string out_var_name("out");
std::string out_ref_var_name("out_ref");
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize({bs, ic, ih, iw});
// initialize input&output data
FillTensor<float, int>(x);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("scale");
opdesc.SetInput("X", {x_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("bias_after_scale", bias_after_scale);
opdesc.SetAttr("scale", scale);
opdesc.SetAttr("bias", bias);
// create and convert op to NPU model, then run it on NPU
auto op = CreateOp<operators::ScaleOp>(opdesc, &scope);
LauchOp(op, {x_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
// execute reference implementation and save to output tensor('out')
scale_ref(op);
// compare results
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
VLOG(5) << i;
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5);
}
}
TEST(NPUBridges, scale) {
for (auto bs : {1, 3}) {
for (auto ic : {1, 3}) {
for (auto ih : {3, 4}) {
for (auto iw : {4, 3}) {
for (auto bias_after_scale : {true, false}) {
for (auto scale : {-1.0f, 5.0f}) {
for (auto bias : {-2.0f, 30.0f}) {
VLOG(3) << "bs: " << bs << " ic: " << ic << " ih: " << ih
<< " iw: " << iw
<< " bias_after_scale: " << bias_after_scale
<< " scale: " << scale << " bias: " << bias;
test_scale(bs, ic, ih, iw, bias_after_scale, scale, bias);
}
}
}
}
}
}
}
}
} // namespace bridges
} // namespace npu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(scale);
USE_NPU_BRIDGE(scale);
......@@ -149,6 +149,9 @@ int SubgraphEngine::BuildDeviceProgram() {
case PRECISION(kFloat):
origin_otensors_[i]->mutable_data<float>();
break;
case PRECISION(kBool):
origin_otensors_[i]->mutable_data<bool>();
break;
case PRECISION(kInt8):
origin_otensors_[i]->mutable_data<int8_t>();
break;
......@@ -231,10 +234,12 @@ void SubgraphCompute::Run() {
REGISTER_LITE_KERNEL(subgraph,
kNPU,
kFloat,
kAny,
kNCHW,
paddle::lite::kernels::npu::SubgraphCompute,
def)
.BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))})
.BindInput("Inputs",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.BindOutput("Outputs",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.Finalize();
......@@ -51,7 +51,7 @@ class SubgraphEngine : public subgraph::Engine {
std::unique_ptr<hiai::AiModelMngerClient> device_program_{nullptr};
};
class SubgraphCompute : public KernelLite<TARGET(kNPU), PRECISION(kFloat)> {
class SubgraphCompute : public KernelLite<TARGET(kNPU), PRECISION(kAny)> {
public:
using param_t = operators::SubgraphParam;
......
......@@ -19,7 +19,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_
lite_cc_test(test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_logical_xor_compute SRCS logical_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
......@@ -69,7 +69,7 @@ void TestAssign(const Place& place) {
TEST(Assign, precision) {
Place place;
#ifdef LITE_WITH_ARM
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#else
return;
#endif
......
......@@ -97,7 +97,7 @@ class AssignValueComputeTester : public arena::TestCase {
TEST(AssignValue, precision) {
Place place;
#ifdef LITE_WITH_ARM
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#else
return;
#endif
......
......@@ -134,7 +134,7 @@ TEST(Cast, precision) {
Place place;
float abs_error = 2e-5;
#if defined(LITE_WITH_ARM)
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
#else
......
......@@ -16,12 +16,14 @@
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace paddle {
namespace lite {
#define COMPARE_FUNCTOR(name, op) \
template <typename T> \
struct _##name##Functor { \
struct name##Functor { \
inline bool operator()(const T& a, const T& b) const { return a op b; } \
};
......@@ -33,7 +35,7 @@ COMPARE_FUNCTOR(GreaterThan, >);
COMPARE_FUNCTOR(GreaterEqual, >=);
template <>
struct _EqualFunctor<float> {
struct EqualFunctor<float> {
inline bool operator()(const float& a, const float& b) const {
// It is safe to cast a and b to double.
return fabs(static_cast<double>(a - b)) < 1e-8;
......@@ -41,59 +43,56 @@ struct _EqualFunctor<float> {
};
template <>
struct _NotEqualFunctor<float> {
struct NotEqualFunctor<float> {
inline bool operator()(const float& a, const float& b) const {
return !_EqualFunctor<float>()(a, b);
return !EqualFunctor<float>()(a, b);
}
};
template <template <typename T> class Functor>
class LessThanTester : public arena::TestCase {
template <typename T, template <typename U> class Functor>
class CompareComputeTester : public arena::TestCase {
protected:
std::string input_x_ = "x";
std::string input_y_ = "y";
std::string output_ = "out";
int axis_ = 1;
bool force_cpu_ = 0;
std::string x_ = "x";
std::string y_ = "y";
std::string out_ = "out";
std::string op_ = "less_than";
DDim x_dims_{{3, 5, 4, 4}};
DDim y_dims_{{4}};
std::string opname_ = "less_than";
int axis_ = -1;
bool force_cpu_ = false;
public:
LessThanTester(const Place& place,
CompareComputeTester(const Place& place,
const std::string& alias,
bool force_cpu,
int axis,
const std::string op,
DDim x_dims,
DDim y_dims,
const std::string& opname)
int axis = -1)
: TestCase(place, alias),
axis_(axis),
force_cpu_(force_cpu),
op_(op),
x_dims_(x_dims),
y_dims_(y_dims),
opname_(opname) {}
axis_(axis) {}
void RunBaseline(Scope* scope) override {
auto* out = scope->NewTensor(output_);
auto* out = scope->NewTensor(out_);
CHECK(out);
out->Resize(x_dims_);
auto* out_data = out->mutable_data<bool>();
auto axis = axis_;
auto* x = scope->FindTensor(input_x_);
const auto* x_data = x->data<float>();
auto* y = scope->FindTensor(input_y_);
auto* y_data_in = y->data<float>();
auto* x = scope->FindTensor(x_);
const auto* x_data = x->data<T>();
auto* y = scope->FindTensor(y_);
auto* y_data_in = y->data<T>();
using CompareFunc = Functor<float>;
using CompareFunc = Functor<T>;
if (x_dims_.size() == y_dims_.size()) {
for (int i = 0; i < x_dims_.production(); i++) {
// out_data[i] = x_data[i] < y_data[i];
out_data[i] = CompareFunc()(x_data[i], y_data_in[i]);
}
} else {
auto* y_data = reinterpret_cast<float*>(
malloc(x_dims_.production() * sizeof(float)));
auto* y_data =
reinterpret_cast<T*>(malloc(x_dims_.production() * sizeof(T)));
if (axis < 0) {
axis = x_dims_.size() - y_dims_.size();
......@@ -111,12 +110,12 @@ class LessThanTester : public arena::TestCase {
num *= x_dims_[i];
}
int ysize = channels * num;
float* y_data_t = reinterpret_cast<float*>(y_data);
T* y_data_t = reinterpret_cast<T*>(y_data);
if (num == 1) {
for (int i = 0; i < batch; ++i) {
memcpy(reinterpret_cast<void*>(y_data_t),
reinterpret_cast<const void*>(&y_data_in[0]),
ysize * sizeof(float));
ysize * sizeof(T));
y_data_t += ysize;
}
......@@ -126,118 +125,118 @@ class LessThanTester : public arena::TestCase {
y_data_t[i * num + j] = y_data_in[i];
}
}
float* tempptr = y_data_t;
T* tempptr = y_data_t;
for (int i = 0; i < batch; ++i) {
memcpy(y_data_t, tempptr, ysize * sizeof(float));
memcpy(y_data_t, tempptr, ysize * sizeof(T));
y_data_t += ysize;
}
}
for (int i = 0; i < x_dims_.production(); i++) {
// out_data[i] = x_data[i] < y_data[i];
out_data[i] = CompareFunc()(x_data[i], y_data[i]);
}
}
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType(opname_);
op_desc->SetInput("X", {input_x_});
op_desc->SetInput("Y", {input_y_});
op_desc->SetOutput("Out", {output_});
op_desc->SetType(op_);
op_desc->SetInput("X", {x_});
op_desc->SetInput("Y", {y_});
op_desc->SetOutput("Out", {out_});
op_desc->SetAttr("axis", axis_);
op_desc->SetAttr("force_cpu", force_cpu_);
}
void PrepareData() override {
std::vector<float> data(x_dims_.production());
std::vector<float> datay(
y_dims_.production()); // datay(dims_.production());
for (int i = 0; i < x_dims_.production(); i++) {
data[i] = 1.1;
}
for (int i = 0; i < y_dims_.production(); i++) {
datay[i] = i;
}
SetCommonTensor(input_x_, x_dims_, data.data());
SetCommonTensor(input_y_, y_dims_, datay.data());
std::vector<T> dx(x_dims_.production());
std::vector<T> dy(y_dims_.production());
fill_data_rand<T>(dx.data(), -5, 5, x_dims_.production());
fill_data_rand<T>(dy.data(), -5, 5, y_dims_.production());
SetCommonTensor(x_, x_dims_, dx.data());
SetCommonTensor(y_, y_dims_, dy.data());
}
};
void test_compare(Place place) {
for (bool force_cpu : {0}) {
for (auto n : {1, 3, 4}) {
for (auto c : {1, 3, 4}) {
for (auto h : {1, 3, 4}) {
for (auto w : {1, 3, 4}) {
for (auto axis : {-1, 0, 1, 3}) {
for (auto yd : {std::vector<int64_t>({n}),
std::vector<int64_t>({c}),
std::vector<int64_t>({h}),
std::vector<int64_t>({w}),
std::vector<int64_t>({n, c}),
std::vector<int64_t>({h, w}),
std::vector<int64_t>({n, c, h}),
std::vector<int64_t>({n, c, h, w})}) {
DDimLite x_dims = DDim(std::vector<int64_t>({n, c, h, w}));
DDimLite y_dims = DDim(yd);
int axis_t = axis < 0 ? x_dims.size() - y_dims.size() : axis;
if (axis_t + y_dims.size() > 4) continue;
bool flag = false;
for (int i = 0; i < y_dims.size(); i++) {
if (x_dims[i + axis_t] != y_dims[i]) flag = true;
}
if (flag) continue;
std::unique_ptr<arena::TestCase> less_than_tester(
new LessThanTester<paddle::lite::_LessThanFunctor>(
place,
"def",
force_cpu,
axis,
x_dims,
y_dims,
"less_than"));
arena::Arena less_than_arena(
std::move(less_than_tester), place, 0.001);
less_than_arena.TestPrecision();
std::unique_ptr<arena::TestCase> equal_tester(
new LessThanTester<paddle::lite::_EqualFunctor>(place,
"def",
force_cpu,
axis,
x_dims,
y_dims,
"equal"));
arena::Arena equal_arena(std::move(equal_tester), place, 0.001);
equal_arena.TestPrecision();
std::unique_ptr<arena::TestCase> greater_than_tester(
new LessThanTester<paddle::lite::_GreaterThanFunctor>(
place,
"def",
force_cpu,
axis,
x_dims,
y_dims,
"greater_than"));
arena::Arena greater_than_arena(
std::move(greater_than_tester), place, 0.001);
greater_than_arena.TestPrecision();
}
}
}
}
template <typename T>
void TestCompare(Place place,
float abs_error,
std::string op,
std::vector<int64_t> x_dims,
std::vector<int64_t> y_dims,
int axis) {
if (typeid(T) == typeid(float)) {
place.precision = PRECISION(kFloat);
} else if (typeid(T) == typeid(int32_t)) {
place.precision = PRECISION(kInt32);
} else if (typeid(T) == typeid(int64_t)) {
place.precision = PRECISION(kInt64);
} else {
LOG(FATAL) << "unsupported dtype";
}
std::unique_ptr<arena::TestCase> tester = nullptr;
if (op == "equal") {
tester = static_cast<std::unique_ptr<arena::TestCase>>(
new CompareComputeTester<T, EqualFunctor>(
place, "def", op, DDim(x_dims), DDim(y_dims), axis));
} else if (op == "not_equal") {
tester = static_cast<std::unique_ptr<arena::TestCase>>(
new CompareComputeTester<T, NotEqualFunctor>(
place, "def", op, DDim(x_dims), DDim(y_dims), axis));
} else if (op == "less_than") {
tester = static_cast<std::unique_ptr<arena::TestCase>>(
new CompareComputeTester<T, LessThanFunctor>(
place, "def", op, DDim(x_dims), DDim(y_dims), axis));
} else if (op == "less_equal") {
tester = static_cast<std::unique_ptr<arena::TestCase>>(
new CompareComputeTester<T, LessEqualFunctor>(
place, "def", op, DDim(x_dims), DDim(y_dims), axis));
} else if (op == "greater_than") {
tester = static_cast<std::unique_ptr<arena::TestCase>>(
new CompareComputeTester<T, GreaterThanFunctor>(
place, "def", op, DDim(x_dims), DDim(y_dims), axis));
} else if (op == "greater_equal") {
tester = static_cast<std::unique_ptr<arena::TestCase>>(
new CompareComputeTester<T, GreaterEqualFunctor>(
place, "def", op, DDim(x_dims), DDim(y_dims), axis));
} else {
LOG(FATAL) << "unsupported type";
}
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
#if defined(LITE_WITH_NPU)
TEST(Compare_OP_NPU, precision) {
Place place{TARGET(kNPU)};
float abs_error = 1e-2;
TestCompare<float>(
place, abs_error, "less_than", {2, 3, 4, 5}, {2, 3, 4, 5}, -1);
TestCompare<float>(place, abs_error, "less_than", {2, 3, 4}, {2, 3, 4}, 0);
}
#elif defined(LITE_WITH_ARM)
TEST(Compare_OP_ARM, precision) {
Place place{TARGET(kARM)};
float abs_error = 1e-5;
for (auto op : std::vector<std::string>{"equal",
"not_equal",
"less_than",
"less_equal",
"greater_than",
"greater_equal"}) {
TestCompare<float>(place, abs_error, op, {2, 3, 4, 5}, {2, 3, 4, 5}, -1);
TestCompare<float>(place, abs_error, op, {2, 3, 4}, {2, 3, 4}, 0);
}
TestCompare<float>(place, abs_error, "equal", {2, 3, 4}, {3, 4}, 1);
TestCompare<float>(place, abs_error, "equal", {2, 3, 4, 5}, {3, 4}, 1);
TestCompare<float>(place, abs_error, "equal", {2, 3, 4}, {4}, 2);
TestCompare<float>(place, abs_error, "equal", {2, 3, 4, 5}, {5}, 3);
TestCompare<int32_t>(place, abs_error, "less_than", {3, 4}, {3, 4}, -1);
TestCompare<int64_t>(place, abs_error, "less_than", {3, 4}, {3, 4}, -1);
}
TEST(Compare_OP, precision) {
// #ifdef LITE_WITH_X86
// // Place place(TARGET(kX86));
// // #endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_compare(place);
#endif
}
} // namespace lite
} // namespace paddle
......@@ -136,7 +136,7 @@ TEST(fill_constant_batch_size_like, precision) {
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#else
return;
#endif
......
......@@ -174,7 +174,7 @@ TEST(fill_constant, precision) {
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#else
return;
#endif
......
......@@ -95,7 +95,7 @@ TEST(Gather, precision) {
float abs_error = 2e-5;
Place place;
#if defined(LITE_WITH_ARM)
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
#else
......
......@@ -66,12 +66,14 @@ class IncrementComputeTester : public arena::TestCase {
};
void test_increment(Place place, float abs_error) {
DDimLite dims_0{{3, 5, 4, 4}};
DDimLite dims_1{{3, 5}};
for (auto dims : {dims_0, dims_1}) {
std::vector<std::vector<int64_t>> x_dims{{3, 5, 4, 4}, {3, 5}, {1}};
for (auto dims : x_dims) {
for (float step : {1, 2}) {
#if LITE_WITH_NPU
if (dims.size() != 1) continue;
#endif
std::unique_ptr<arena::TestCase> tester(
new IncrementComputeTester(place, "def", step, dims));
new IncrementComputeTester(place, "def", step, DDim(dims)));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
......@@ -81,8 +83,11 @@ void test_increment(Place place, float abs_error) {
TEST(Increment, precision) {
Place place;
float abs_error = 2e-5;
#if defined(LITE_WITH_ARM)
place = {TARGET(kARM), PRECISION(kAny)};
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#else
return;
#endif
......
......@@ -112,7 +112,7 @@ TEST(LookupTable, precision) {
float abs_error = 2e-5;
Place place;
#if defined(LITE_WITH_ARM)
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
#else
......
......@@ -129,7 +129,7 @@ class LookupTableDequantComputeTest : public arena::TestCase {
TEST(LookupTableDequant, precision) {
#ifdef LITE_WITH_ARM
float abs_error = 2e-5;
Place place = {TARGET(kARM), PRECISION(kAny)};
Place place = TARGET(kARM);
for (auto ids_dims :
std::vector<std::vector<int64_t>>{{5, 2, 3, 1}, {2, 3, 1}, {3, 1}}) {
for (auto w_dims :
......
......@@ -88,7 +88,7 @@ TEST(ReadFromArray, precision) {
Place place;
float abs_error = 1e-5;
#ifdef LITE_WITH_ARM
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#else
return;
#endif
......
......@@ -85,7 +85,7 @@ TEST(WriteToArray, precision) {
Place place;
float abs_error = 1e-5;
#ifdef LITE_WITH_ARM
place = {TARGET(kARM), PRECISION(kAny)};
place = TARGET(kARM);
#else
return;
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册