未验证 提交 dcf6acce 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] add shape, gather, lookup_table bridge (#3197)

* [NPU] add shape bridge

move shape arm kernel to host

* enhance compare arm kernel

* [NPU] add gather op bridge

* enable reshape arm ut

* [NPU] add lookup_table bridge
上级 ae3ebea5
......@@ -292,13 +292,10 @@ void Predictor::Build(const cpp::ProgramDesc &desc,
program_desc_ = desc;
// `inner_places` is used to optimize passes
std::vector<Place> inner_places = valid_places;
inner_places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
inner_places.emplace_back(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW));
inner_places.emplace_back(
TARGET(kHost), PRECISION(kInt32), DATALAYOUT(kNCHW));
inner_places.emplace_back(
TARGET(kHost), PRECISION(kInt64), DATALAYOUT(kNCHW));
for (auto &valid_place : valid_places) {
inner_places.emplace_back(
Place(TARGET(kHost), valid_place.precision, valid_place.layout));
}
// Analysis whether the modle is quantized.
// For quantized model, add place(arm, int8) to inner_places
......
......@@ -151,16 +151,30 @@ KernelRegistry::KernelRegistry()
INIT_FOR(kMLU, kInt16, kNHWC);
INIT_FOR(kMLU, kInt16, kNCHW);
INIT_FOR(kHost, kFloat, kNCHW);
INIT_FOR(kHost, kInt32, kNCHW);
INIT_FOR(kHost, kInt64, kNCHW);
INIT_FOR(kHost, kAny, kNCHW);
INIT_FOR(kHost, kFloat, kNHWC);
INIT_FOR(kHost, kFloat, kAny);
INIT_FOR(kHost, kAny, kNHWC);
INIT_FOR(kHost, kAny, kAny);
INIT_FOR(kHost, kAny, kNHWC);
INIT_FOR(kHost, kAny, kAny);
INIT_FOR(kHost, kBool, kNCHW);
INIT_FOR(kHost, kBool, kNHWC);
INIT_FOR(kHost, kBool, kAny);
INIT_FOR(kHost, kFloat, kNCHW);
INIT_FOR(kHost, kFloat, kNHWC);
INIT_FOR(kHost, kFloat, kAny);
INIT_FOR(kHost, kFP16, kNCHW);
INIT_FOR(kHost, kFP16, kNHWC);
INIT_FOR(kHost, kFP16, kAny);
INIT_FOR(kHost, kInt8, kNCHW);
INIT_FOR(kHost, kInt8, kNHWC);
INIT_FOR(kHost, kInt8, kAny);
INIT_FOR(kHost, kInt16, kNCHW);
INIT_FOR(kHost, kInt16, kNHWC);
INIT_FOR(kHost, kInt16, kAny);
INIT_FOR(kHost, kInt32, kNCHW);
INIT_FOR(kHost, kInt32, kNHWC);
INIT_FOR(kHost, kInt32, kAny);
INIT_FOR(kHost, kInt64, kNCHW);
INIT_FOR(kHost, kInt64, kNHWC);
INIT_FOR(kHost, kInt64, kAny);
INIT_FOR(kX86, kFloat, kNCHW);
INIT_FOR(kX86, kAny, kNCHW);
......
......@@ -63,7 +63,6 @@ add_kernel(lrn_compute_arm ARM extra SRCS lrn_compute.cc DEPS ${lite_kernel_deps
add_kernel(decode_bboxes_compute_arm ARM extra SRCS decode_bboxes_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(density_prior_box_compute_arm ARM basic SRCS density_prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(axpy_compute_arm ARM extra SRCS axpy_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(shape_compute_arm ARM extra SRCS shape_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(reduce_max_compute_arm ARM extra SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_expand_compute_arm ARM extra SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(im2sequence_compute_arm ARM extra SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm)
......@@ -92,7 +91,6 @@ add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_
add_kernel(logical_compute_arm ARM extra SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(while_compute_arm ARM extra SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(compare_compute_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(write_to_array_compute_arm ARM extra SRCS write_to_array_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/compare_compute.h"
#include <vector>
#include "lite/api/paddle_place.h"
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
#define COMPARE_FUNCTOR(name, op) \
template <typename T> \
struct _##name##Functor { \
inline bool operator()(const T &a, const T &b) const { return a op b; } \
};
COMPARE_FUNCTOR(Equal, ==);
COMPARE_FUNCTOR(NotEqual, !=);
COMPARE_FUNCTOR(LessThan, <);
COMPARE_FUNCTOR(LessEqual, <=);
COMPARE_FUNCTOR(GreaterThan, >);
COMPARE_FUNCTOR(GreaterEqual, >=);
template <>
struct _EqualFunctor<float> {
inline bool operator()(const float &a, const float &b) const {
// It is safe to cast a and b to double.
return fabs(static_cast<double>(a - b)) < 1e-8;
}
};
template <>
struct _NotEqualFunctor<float> {
inline bool operator()(const float &a, const float &b) const {
return !_EqualFunctor<float>()(a, b);
}
};
inline void get_mid_dims(const lite::DDim &x_dims,
const lite::DDim &y_dims,
const int axis,
int *pre,
int *n,
int *post) {
*pre = 1;
*n = 1;
*post = 1;
for (int i = 0; i < axis; ++i) {
(*pre) *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
(*n) *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
(*post) *= x_dims[i];
}
}
template <template <typename T> class Functor>
void CompareCompute<Functor>::Run() {
auto &param = this->Param<operators::CompareParam>();
using CompareFunctor = Functor<float>;
const size_t x_size = param.X->numel();
const size_t y_size = param.Y->numel();
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
bool *z = param.Out->template mutable_data<bool>();
const auto *x = param.X->template data<float>();
const auto *y = param.Y->template data<float>();
auto axis = param.axis;
bool force_cpu = param.force_cpu;
if (x_size == y_size) {
for (int i = 0; i < x_size; ++i) {
z[i] = CompareFunctor()(x[i], y[i]);
}
} else {
int axis = (param.axis == -1 ? x_dims.size() - y_dims.size() : param.axis);
int outer_num, mid_num, inner_num;
get_mid_dims(x_dims, y_dims, axis, &outer_num, &mid_num, &inner_num);
for (int outer_id = 0; outer_id < outer_num; ++outer_id) {
for (int mid_id = 0; mid_id < mid_num; ++mid_id) {
auto y_data = y[mid_id];
for (int inner_id = 0; inner_id < inner_num; ++inner_id) {
int index = (outer_id * mid_num + mid_id) * inner_num + inner_id;
z[index] = CompareFunctor()(x[index], y_data);
// z[index] = x[index] < y_data;
}
}
}
}
}
template <template <typename T> class Functor>
void CompareCompute_int32<Functor>::Run() {
auto &param = this->Param<operators::CompareParam>();
using CompareFunctor = Functor<int>;
const size_t x_size = param.X->numel();
const size_t y_size = param.Y->numel();
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
bool *z = param.Out->template mutable_data<bool>();
const auto *x = param.X->template data<int>();
const auto *y = param.Y->template data<int>();
auto axis = param.axis;
bool force_cpu = param.force_cpu;
if (x_size == y_size) {
for (int i = 0; i < x_size; ++i) {
z[i] = CompareFunctor()(x[i], y[i]);
}
} else {
int axis = (param.axis == -1 ? x_dims.size() - y_dims.size() : param.axis);
int outer_num, mid_num, inner_num;
get_mid_dims(x_dims, y_dims, axis, &outer_num, &mid_num, &inner_num);
for (int outer_id = 0; outer_id < outer_num; ++outer_id) {
for (int mid_id = 0; mid_id < mid_num; ++mid_id) {
auto y_data = y[mid_id];
for (int inner_id = 0; inner_id < inner_num; ++inner_id) {
int index = (outer_id * mid_num + mid_id) * inner_num + inner_id;
z[index] = CompareFunctor()(x[index], y_data);
// z[index] = x[index] < y_data;
}
}
}
}
}
template <template <typename T> class Functor>
void CompareCompute_int64<Functor>::Run() {
auto &param = this->Param<operators::CompareParam>();
using CompareFunctor = Functor<int64_t>;
const size_t x_size = param.X->numel();
const size_t y_size = param.Y->numel();
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
bool *z = param.Out->template mutable_data<bool>();
const auto *x = param.X->template data<int64_t>();
const auto *y = param.Y->template data<int64_t>();
auto axis = param.axis;
bool force_cpu = param.force_cpu;
if (x_size == y_size) {
for (int i = 0; i < x_size; ++i) {
z[i] = CompareFunctor()(x[i], y[i]);
}
} else {
int axis = (param.axis == -1 ? x_dims.size() - y_dims.size() : param.axis);
int outer_num, mid_num, inner_num;
get_mid_dims(x_dims, y_dims, axis, &outer_num, &mid_num, &inner_num);
for (int outer_id = 0; outer_id < outer_num; ++outer_id) {
for (int mid_id = 0; mid_id < mid_num; ++mid_id) {
auto y_data = y[mid_id];
for (int inner_id = 0; inner_id < inner_num; ++inner_id) {
int index = (outer_id * mid_num + mid_id) * inner_num + inner_id;
z[index] = CompareFunctor()(x[index], y_data);
}
}
}
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(equal,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_EqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(equal,
kARM,
kInt32,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int32<
paddle::lite::kernels::arm::_EqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(not_equal,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_NotEqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_than,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_LessThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_than,
kARM,
kInt32,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int32<
paddle::lite::kernels::arm::_LessThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_than,
kARM,
kInt64,
kNCHW,
paddle::lite::kernels::arm::CompareCompute_int64<
paddle::lite::kernels::arm::_LessThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(less_equal,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_LessEqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(greater_than,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_GreaterThanFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
REGISTER_LITE_KERNEL(greater_equal,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::CompareCompute<
paddle::lite::kernels::arm::_GreaterEqualFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.Finalize();
......@@ -2,7 +2,9 @@ message(STATUS "compile with lite host kernels")
add_kernel(feed_compute_host Host basic SRCS feed_compute.cc DEPS ${lite_kernel_deps})
add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op)
add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
add_kernel(compare_compute_host Host extra SRCS compare_compute.cc DEPS ${lite_kernel_deps})
add_kernel(ctc_align_compute_host Host extra SRCS ctc_align_compute.cc DEPS ${lite_kernel_deps})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/host/compare_compute.h"
#include <vector>
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
#define COMPARE_FUNCTOR(name, op) \
template <typename T> \
struct _##name##Functor { \
using TYPE = T; \
inline bool operator()(const T &a, const T &b) const { return a op b; } \
};
COMPARE_FUNCTOR(Equal, ==);
COMPARE_FUNCTOR(NotEqual, !=);
COMPARE_FUNCTOR(LessThan, <);
COMPARE_FUNCTOR(LessEqual, <=);
COMPARE_FUNCTOR(GreaterThan, >);
COMPARE_FUNCTOR(GreaterEqual, >=);
template <>
struct _EqualFunctor<float> {
using TYPE = float;
inline bool operator()(const float &a, const float &b) const {
// It is safe to cast a and b to double.
return fabs(static_cast<double>(a - b)) < 1e-8;
}
};
template <>
struct _NotEqualFunctor<float> {
using TYPE = float;
inline bool operator()(const float &a, const float &b) const {
return !_EqualFunctor<float>()(a, b);
}
};
inline void get_mid_dims(const lite::DDim &x_dims,
const lite::DDim &y_dims,
const int axis,
int *pre,
int *n,
int *post) {
*pre = 1;
*n = 1;
*post = 1;
for (int i = 0; i < axis; ++i) {
(*pre) *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
(*n) *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
(*post) *= x_dims[i];
}
}
template <PrecisionType PType, typename CompareFunctor>
void CompareCompute<PType, CompareFunctor>::Run() {
auto &param = this->template Param<operators::CompareParam>();
using DType = typename CompareFunctor::TYPE;
const size_t x_size = param.X->numel();
const size_t y_size = param.Y->numel();
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
bool *z = param.Out->template mutable_data<bool>();
const auto *x = param.X->template data<DType>();
const auto *y = param.Y->template data<DType>();
if (x_size == y_size) {
for (int i = 0; i < x_size; ++i) {
z[i] = CompareFunctor()(x[i], y[i]);
}
} else {
int axis = (param.axis == -1 ? x_dims.size() - y_dims.size() : param.axis);
int outer_num, mid_num, inner_num;
get_mid_dims(x_dims, y_dims, axis, &outer_num, &mid_num, &inner_num);
for (int outer_id = 0; outer_id < outer_num; ++outer_id) {
for (int mid_id = 0; mid_id < mid_num; ++mid_id) {
auto y_data = y[mid_id];
for (int inner_id = 0; inner_id < inner_num; ++inner_id) {
int index = (outer_id * mid_num + mid_id) * inner_num + inner_id;
z[index] = CompareFunctor()(x[index], y_data);
}
}
}
}
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
using equal_float = paddle::lite::kernels::host::CompareCompute<
PRECISION(kFloat),
paddle::lite::kernels::host::_EqualFunctor<float>>;
REGISTER_LITE_KERNEL(equal, kHost, kFloat, kAny, equal_float, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using equal_int32 = paddle::lite::kernels::host::CompareCompute<
PRECISION(kInt32),
paddle::lite::kernels::host::_EqualFunctor<int32_t>>;
REGISTER_LITE_KERNEL(equal, kHost, kInt32, kAny, equal_int32, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kInt32), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kInt32), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using not_equal_float = paddle::lite::kernels::host::CompareCompute<
PRECISION(kFloat),
paddle::lite::kernels::host::_NotEqualFunctor<float>>;
REGISTER_LITE_KERNEL(not_equal, kHost, kFloat, kAny, not_equal_float, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using less_than_float = paddle::lite::kernels::host::CompareCompute<
PRECISION(kFloat),
paddle::lite::kernels::host::_LessThanFunctor<float>>;
REGISTER_LITE_KERNEL(less_than, kHost, kFloat, kAny, less_than_float, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using less_than_int32 = paddle::lite::kernels::host::CompareCompute<
PRECISION(kInt32),
paddle::lite::kernels::host::_LessThanFunctor<int32_t>>;
REGISTER_LITE_KERNEL(less_than, kHost, kInt32, kAny, less_than_int32, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kInt32), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kInt32), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using less_than_int64 = paddle::lite::kernels::host::CompareCompute<
PRECISION(kInt64),
paddle::lite::kernels::host::_LessThanFunctor<int64_t>>;
REGISTER_LITE_KERNEL(less_than, kHost, kInt64, kAny, less_than_int64, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kInt64), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kInt64), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using less_equal_float = paddle::lite::kernels::host::CompareCompute<
PRECISION(kFloat),
paddle::lite::kernels::host::_LessEqualFunctor<float>>;
REGISTER_LITE_KERNEL(less_equal, kHost, kFloat, kAny, less_equal_float, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using greater_than_float = paddle::lite::kernels::host::CompareCompute<
PRECISION(kFloat),
paddle::lite::kernels::host::_GreaterThanFunctor<float>>;
REGISTER_LITE_KERNEL(greater_than, kHost, kFloat, kAny, greater_than_float, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
using greater_equal_float = paddle::lite::kernels::host::CompareCompute<
PRECISION(kFloat),
paddle::lite::kernels::host::_GreaterEqualFunctor<float>>;
REGISTER_LITE_KERNEL(
greater_equal, kHost, kFloat, kAny, greater_equal_float, def)
.BindInput("X",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindInput("Y",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kBool), DATALAYOUT(kAny), -1)})
.Finalize();
......@@ -13,43 +13,24 @@
// limitations under the License.
#pragma once
#include <stdint.h>
#include "lite/backends/arm/math/type_trans.h"
#include "lite/core/kernel.h"
#include "lite/operators/compare_op.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
template <template <typename T> class Functor>
class CompareCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
template <PrecisionType PType, typename CompareFunctor>
class CompareCompute
: public KernelLite<TARGET(kHost), PType, DATALAYOUT(kAny)> {
public:
void Run() override;
~CompareCompute() {}
virtual ~CompareCompute() = default;
};
template <template <typename T> class Functor>
class CompareCompute_int32
: public KernelLite<TARGET(kARM), PRECISION(kInt32)> {
public:
void Run() override;
~CompareCompute_int32() {}
};
template <template <typename T> class Functor>
class CompareCompute_int64
: public KernelLite<TARGET(kARM), PRECISION(kInt64)> {
public:
void Run() override;
~CompareCompute_int64() {}
};
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/host/reshape_compute.h"
#include <gtest/gtest.h>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
TEST(reshape_host, init) {
ReshapeCompute reshape;
ASSERT_EQ(reshape.precision(), PRECISION(kAny));
ASSERT_EQ(reshape.target(), TARGET(kHost));
}
TEST(reshape_host, compute) {
ReshapeCompute reshape;
operators::ReshapeParam param;
Tensor input;
Tensor output;
input.Resize({1, 2, 4, 6});
auto* input_data = input.mutable_data<float>();
for (int i = 0; i < input.numel(); i++) {
input_data[i] = i;
}
Tensor shape_tensor;
shape_tensor.Resize({2});
auto* shape_tensor_data = shape_tensor.mutable_data<int>();
shape_tensor_data[0] = 6;
shape_tensor_data[1] = 8;
// set param and run
param.x = &input;
param.shape_tensor = &shape_tensor; // use shape_tensor
param.inplace = false;
param.output = &output;
reshape.SetParam(param);
reshape.Run();
// check output dims
CHECK_EQ(shape_tensor.numel(), output.numel());
for (int i = 0; i < output.dims().size(); i++) {
CHECK_EQ(output.dims()[i], shape_tensor_data[i]);
}
// check output data
auto* output_data = output.mutable_data<float>();
CHECK_NE(output_data, input_data);
for (int i = 0; i < output.numel(); i++) {
EXPECT_NEAR(output_data[i], input_data[i], 1e-6);
}
// use shape, set param and run
param.shape_tensor = nullptr;
param.shape_vct = {-1, 0, 3, 2, 1};
reshape.SetParam(param);
reshape.Run();
// check output dims
CHECK_EQ(shape_tensor.numel(), output.numel());
for (int i = 0; i < output.dims().size(); i++) {
CHECK_EQ(output.dims()[i], shape_tensor_data[i]);
}
// check output data
output_data = output.mutable_data<float>();
CHECK_NE(output_data, input_data);
for (int i = 0; i < output.numel(); i++) {
EXPECT_NEAR(output_data[i], input_data[i], 1e-6);
}
// check output data if inplace = true;
param.inplace = true;
reshape.SetParam(param);
reshape.Run();
output_data = output.mutable_data<float>();
CHECK_EQ(output_data, input_data);
}
TEST(reshape, retrive_op) {
auto reshape =
KernelRegistry::Global()
.Create<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)>("reshape");
ASSERT_FALSE(reshape.empty());
ASSERT_TRUE(reshape.front());
}
TEST(reshape2, retrive_op) {
auto reshape2 =
KernelRegistry::Global()
.Create<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)>("reshape2");
ASSERT_FALSE(reshape2.empty());
ASSERT_TRUE(reshape2.front());
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(reshape, kHost, kAny, kAny, def);
USE_LITE_KERNEL(reshape2, kHost, kAny, kAny, def);
......@@ -12,13 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/shape_compute.h"
#include "lite/backends/arm/math/funcs.h"
#include "lite/kernels/host/shape_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
void ShapeCompute::Run() {
auto& param = Param<operators::ShapeParam>();
......@@ -29,13 +28,17 @@ void ShapeCompute::Run() {
}
}
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
shape, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::ShapeCompute, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
shape, kHost, kAny, kAny, paddle::lite::kernels::host::ShapeCompute, def)
.BindInput("Input",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny), -1)})
.BindOutput("Out",
{LiteType::GetTensorTy(
TARGET(kHost), PRECISION(kInt32), DATALAYOUT(kAny), -1)})
.Finalize();
......@@ -19,16 +19,17 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
class ShapeCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
class ShapeCompute
: public KernelLite<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
void Run() override;
virtual ~ShapeCompute() = default;
};
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -38,6 +38,8 @@ lite_cc_library(subgraph_bridge_shuffle_channel_op_npu SRCS shuffle_channel_op.c
lite_cc_library(subgraph_bridge_pad2d_op_npu SRCS pad2d_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_reduce_mean_op_npu SRCS reduce_mean_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_unsqueeze_op_npu SRCS unsqueeze_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_gather_op_npu SRCS gather_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_lookup_table_op_npu SRCS lookup_table_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_argmax_op_npu SRCS argmax_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_instance_norm_op_npu SRCS instance_norm_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS ${npu_subgraph_bridge_deps})
......@@ -47,6 +49,7 @@ lite_cc_library(subgraph_bridge_fill_constant_op_npu SRCS fill_constant_op.cc DE
lite_cc_library(subgraph_bridge_fill_constant_batch_size_like_op_npu SRCS fill_constant_batch_size_like_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_increment_op_npu SRCS increment_op.cc DEPS ${npu_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_compare_op_npu SRCS compare_op.cc DEPS ${npu_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_shape_op_npu SRCS shape_op.cc DEPS ${npu_subgraph_bridge_deps})
set(npu_subgraph_bridges
......@@ -73,6 +76,8 @@ set(npu_subgraph_bridges
subgraph_bridge_pad2d_op_npu
subgraph_bridge_reduce_mean_op_npu
subgraph_bridge_unsqueeze_op_npu
subgraph_bridge_gather_op_npu
subgraph_bridge_lookup_table_op_npu
subgraph_bridge_argmax_op_npu
subgraph_bridge_instance_norm_op_npu
subgraph_bridge_dropout_op_npu
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int GatherConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindTensor(x_name);
auto index_name = op_info->Input("Index").front();
auto index = scope->FindTensor(index_name);
auto index_dims = index->dims();
CHECK(index_dims.size() == 1 ||
(index_dims.size() == 2 && index_dims[1] == 1))
<< "index dims unmatch";
auto out_name = op_info->Output("Out").front();
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// Index node
std::shared_ptr<Node> index_node = nullptr;
if (graph->Has(index_name)) {
index_node = graph->Get(index_name);
} else {
index_node = graph->Add(index_name, *index);
}
// Gather node
auto gather_node = graph->Add<ge::op::Gather>(out_name);
auto gather_op = gather_node->data<ge::op::Gather>();
gather_op->set_input_params(*x_node->data());
gather_op->set_input_indices(*index_node->data());
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(gather,
kNPU,
paddle::lite::subgraph::npu::GatherConverter);
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int LookupTableConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto w_name = op_info->Input("W").front();
auto w = scope->FindTensor(w_name);
auto index_name = op_info->Input("Ids").front();
auto index = scope->FindTensor(index_name);
auto out_name = op_info->Output("Out").front();
auto out = scope->FindTensor(out_name);
auto out_shape = out->dims().Vectorize();
// W node
std::shared_ptr<Node> w_node = nullptr;
if (graph->Has(w_name)) {
w_node = graph->Get(w_name);
} else {
w_node = graph->Add(w_name, *w);
}
// Index node
std::shared_ptr<Node> index_node = nullptr;
if (graph->Has(index_name)) {
index_node = graph->Get(index_name);
} else {
index_node = graph->Add(index_name, *index);
}
// reshape ids
auto reshaped_index_node =
graph->Add<ge::op::Reshape>(index_name + "/reshape");
auto reshaped_index_op = reshaped_index_node->data<ge::op::Reshape>();
reshaped_index_op->set_input_tensor(*index_node->data());
reshaped_index_op->set_attr_shape(ge::AttrValue::LIST_INT({index->numel()}));
reshaped_index_op->set_attr_axis(0);
index_node = reshaped_index_node;
// Gather node
auto gather_node = graph->Add<ge::op::Gather>(out_name);
auto gather_op = gather_node->data<ge::op::Gather>();
gather_op->set_input_params(*w_node->data());
gather_op->set_input_indices(*index_node->data());
// reshape out
auto reshaped_gather_node = graph->Add<ge::op::Reshape>(out_name);
auto reshaped_gather_op = reshaped_gather_node->data<ge::op::Reshape>();
reshaped_gather_op->set_input_tensor(*gather_node->data());
reshaped_gather_op->set_attr_shape(
ge::AttrValue::LIST_INT(out_shape.begin(), out_shape.end()));
reshaped_gather_op->set_attr_axis(0);
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(lookup_table,
kNPU,
paddle::lite::subgraph::npu::LookupTableConverter);
......@@ -45,6 +45,8 @@ USE_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, kNPU);
USE_SUBGRAPH_BRIDGE(fill_constant, kNPU)
USE_SUBGRAPH_BRIDGE(fill_constant_batch_size_like, kNPU)
// USE_SUBGRAPH_BRIDGE(gather, kNPU);
// USE_SUBGRAPH_BRIDGE(lookup_table, kNPU);
USE_SUBGRAPH_BRIDGE(increment, kNPU);
USE_SUBGRAPH_BRIDGE(instance_norm, kNPU);
USE_SUBGRAPH_BRIDGE(fc, kNPU);
......@@ -59,6 +61,7 @@ USE_SUBGRAPH_BRIDGE(reduce_mean, kNPU);
USE_SUBGRAPH_BRIDGE(reshape, kNPU);
USE_SUBGRAPH_BRIDGE(reshape2, kNPU);
USE_SUBGRAPH_BRIDGE(scale, kNPU);
// USE_SUBGRAPH_BRIDGE(shape, kNPU);
USE_SUBGRAPH_BRIDGE(shuffle_channel, kNPU);
USE_SUBGRAPH_BRIDGE(softmax, kNPU);
USE_SUBGRAPH_BRIDGE(split, kNPU);
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace npu {
int ShapeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NPU] Converting " + op_type + "...";
// Get input, output and op attributes
auto x_name = op_info->Input("Input").front();
auto x = scope->FindTensor(x_name);
auto out_name = op_info->Output("Out").front();
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
x_node = graph->Add(x_name, *x);
}
// Shape node
auto shape_node = graph->Add<ge::op::Shape>(out_name);
auto shape_op = shape_node->data<ge::op::Shape>();
shape_op->set_input_x(*x_node->data());
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace npu
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(shape,
kNPU,
paddle::lite::subgraph::npu::ShapeConverter);
......@@ -26,9 +26,8 @@ bool ShapeOpLite::CheckShape() const {
}
bool ShapeOpLite::InferShapeImpl() const {
std::vector<int64_t> shape_vec;
shape_vec.push_back(static_cast<int64_t>(param_.X->dims().size()));
param_.Out->Resize(shape_vec);
int64_t x_dims_size = param_.X->dims().size();
param_.Out->Resize({x_dims_size});
return true;
}
......
......@@ -216,7 +216,7 @@ TEST(Compare_OP_NPU, precision) {
}
#elif defined(LITE_WITH_ARM)
TEST(Compare_OP_ARM, precision) {
Place place{TARGET(kARM)};
Place place{TARGET(kHost)};
float abs_error = 1e-5;
for (auto op : std::vector<std::string>{"equal",
"not_equal",
......
......@@ -91,10 +91,12 @@ class GatherComputeTest : public arena::TestCase {
};
TEST(Gather, precision) {
LOG(INFO) << "test gather op";
float abs_error = 2e-5;
Place place;
#if defined(LITE_WITH_ARM)
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
......@@ -104,8 +106,7 @@ TEST(Gather, precision) {
for (auto x_dims :
std::vector<std::vector<int64_t>>{{5, 2, 3, 4}, {8, 3, 5}, {12, 3}}) {
for (auto index_dims :
std::vector<std::vector<int64_t>>{{3, 1}, {7, 1}, {10, 1}}) {
for (auto index_dims : std::vector<std::vector<int64_t>>{{3}, {7}, {10}}) {
std::unique_ptr<arena::TestCase> tester(
new GatherComputeTest(place, "def", DDim(x_dims), DDim(index_dims)));
arena::Arena arena(std::move(tester), place, abs_error);
......
......@@ -21,6 +21,7 @@
namespace paddle {
namespace lite {
template <typename T>
class LookupTableComputeTest : public arena::TestCase {
protected:
// common attributes for this op.
......@@ -64,7 +65,7 @@ class LookupTableComputeTest : public arena::TestCase {
out->Resize(out_dims);
out->set_lod(ids->lod());
auto ids_data = ids->data<int64_t>();
auto ids_data = ids->data<T>();
auto ids_size = ids_dims.production();
auto w_data = w->data<float>();
auto w_rows = w_dims[0];
......@@ -95,9 +96,8 @@ class LookupTableComputeTest : public arena::TestCase {
}
void PrepareData() override {
std::vector<int64_t> ids(ids_dims_.production());
fill_data_rand<int64_t>(
ids.data(), 0, w_dims_[0] - 1, ids_dims_.production());
std::vector<T> ids(ids_dims_.production());
fill_data_rand<T>(ids.data(), 0, w_dims_[0] - 1, ids_dims_.production());
std::vector<float> w(w_dims_.production());
fill_data_rand(w.data(), -1.f, 1.f, w_dims_.production());
......@@ -109,9 +109,12 @@ class LookupTableComputeTest : public arena::TestCase {
TEST(LookupTable, precision) {
LOG(INFO) << "test lookup_table op";
float abs_error = 2e-5;
float abs_error = 1e-5;
Place place;
#if defined(LITE_WITH_ARM)
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2;
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
......@@ -119,18 +122,25 @@ TEST(LookupTable, precision) {
return;
#endif
#if defined(LITE_WITH_NPU)
using ID_T = int;
#else
using ID_T = int64_t;
#endif
for (auto ids_dims :
std::vector<std::vector<int64_t>>{{5, 2, 3, 1}, {2, 3, 1}, {3, 1}}) {
for (auto w_dims :
std::vector<std::vector<int64_t>>{{4, 2}, {6, 8}, {12, 15}}) {
#if defined(LITE_WITH_XPU)
#if defined(LITE_WITH_XPU) && defined(LITE_WITH_NPU)
for (auto padding_idx :
std::vector<int64_t>{-1}) { // Only -1 is supported by XPU
std::vector<int64_t>{-1}) { // Only -1 is supported by XPU or NPU
#else
for (auto padding_idx : std::vector<int64_t>{-1, 0, w_dims[0] - 1}) {
#endif
std::unique_ptr<arena::TestCase> tester(new LookupTableComputeTest(
place, "def", DDim(ids_dims), DDim(w_dims), padding_idx));
std::unique_ptr<arena::TestCase> tester(
new LookupTableComputeTest<ID_T>(
place, "def", DDim(ids_dims), DDim(w_dims), padding_idx));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
......
......@@ -204,6 +204,8 @@ TEST(Reshape, precision) {
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place = TARGET(kHost);
#elif defined(LITE_WITH_XPU)
place = TARGET(kXPU);
#else
......
......@@ -16,13 +16,14 @@
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace paddle {
namespace lite {
class ShapeComputeTester : public arena::TestCase {
protected:
// common attributes for this op.
std::string x_ = "Input";
std::string input_ = "Input";
std::string out_ = "Out";
DDim dims_;
......@@ -31,7 +32,7 @@ class ShapeComputeTester : public arena::TestCase {
: TestCase(place, alias), dims_(dims) {}
void RunBaseline(Scope* scope) override {
const auto* input = scope->FindTensor(x_);
const auto* input = scope->FindTensor(input_);
CHECK(input);
auto* out = scope->NewTensor(out_);
CHECK(out);
......@@ -45,42 +46,46 @@ class ShapeComputeTester : public arena::TestCase {
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("shape");
op_desc->SetInput("Input", {x_});
op_desc->SetInput("Input", {input_});
op_desc->SetOutput("Out", {out_});
}
void PrepareData() override {
std::vector<float> in_data(dims_.production());
for (int i = 0; i < dims_.production(); ++i) {
in_data[i] = i;
}
SetCommonTensor(x_, dims_, in_data.data());
std::vector<float> din(dims_.production());
fill_data_rand(din.data(), -1.f, 1.f, dims_.production());
SetCommonTensor(input_, dims_, din.data());
}
};
void test_shape(Place place) {
for (int N : {1, 2, 3, 4}) {
for (int C : {1, 2, 3, 4}) {
for (int H : {1, 2, 3, 4}) {
for (int W : {1, 2, 3, 4}) {
std::unique_ptr<arena::TestCase> tester(
new ShapeComputeTester(place, "def", DDim({N, C, H, W})));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
}
}
}
}
void TestShapeHelper(Place place,
float abs_error,
std::vector<int64_t> x_dims) {
std::unique_ptr<arena::TestCase> tester(
new ShapeComputeTester(place, "def", DDim(x_dims)));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
void test_shape(Place place, float abs_error) {
TestShapeHelper(place, abs_error, {2, 3, 4, 5});
TestShapeHelper(place, abs_error, {3, 4, 5});
TestShapeHelper(place, abs_error, {4, 5});
TestShapeHelper(place, abs_error, {5});
}
TEST(shape, precision) {
#ifdef LITE_WITH_X86
Place place(TARGET(kX86));
#endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_shape(place);
Place place;
float abs_error = 1e-5;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2;
#elif defined(LITE_WITH_ARM)
place = TARGET(kHost);
#else
return;
#endif
test_shape(place, abs_error);
}
} // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册