未验证 提交 b445941f 编写于 作者: Z zhupengyang 提交者: GitHub

move logical_compute to host and add ut (#3424)

上级 92c6f80b
......@@ -111,18 +111,23 @@ class KernelRegistry final {
KernelRegistryForTarget<TARGET(kCUDA),
PRECISION(kFloat),
DATALAYOUT(kNHWC)> *, //
KernelRegistryForTarget<TARGET(kCUDA),
PRECISION(kAny),
DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kCUDA),
PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kCUDA),
PRECISION(kInt8),
DATALAYOUT(kNHWC)> *, //
KernelRegistryForTarget<TARGET(kX86),
PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kX86),
PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, //
......@@ -141,9 +146,7 @@ class KernelRegistry final {
KernelRegistryForTarget<TARGET(kHost),
PRECISION(kInt64),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kCUDA),
PRECISION(kAny),
DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kARM),
PRECISION(kAny),
DATALAYOUT(kAny)> *, //
......
......@@ -88,7 +88,6 @@ add_kernel(gru_compute_arm ARM extra SRCS gru_compute.cc DEPS ${lite_kernel_deps
add_kernel(beam_search_decode_compute_arm ARM extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(logical_compute_arm ARM extra SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(while_compute_arm ARM extra SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
......@@ -8,4 +8,5 @@ add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kerne
add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
add_kernel(compare_compute_host Host extra SRCS compare_compute.cc DEPS ${lite_kernel_deps})
add_kernel(logical_compute_host Host extra SRCS logical_compute.cc DEPS ${lite_kernel_deps})
add_kernel(ctc_align_compute_host Host extra SRCS ctc_align_compute.cc DEPS ${lite_kernel_deps})
......@@ -12,44 +12,34 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/logical_compute.h"
#include <vector>
#include "lite/api/paddle_place.h"
#include "lite/backends/arm/math/funcs.h"
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
#include "lite/kernels/host/logical_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
#define LOGICAL_FUNCTOR(name, op) \
template <typename T> \
struct _##name##Functor { \
inline bool operator()(const T& a, const T& b) const { return a op b; } \
#define LOGICAL_FUNCTOR(name, op) \
struct _##name##Functor { \
inline bool operator()(const bool& a, const bool& b) const { \
return a op b; \
} \
};
LOGICAL_FUNCTOR(LogicalAnd, &&);
LOGICAL_FUNCTOR(LogicalOr, ||);
template <typename T>
struct _LogicalXorFunctor {
inline bool operator()(const T& a, const T& b) const {
inline bool operator()(const bool& a, const bool& b) const {
return (a || b) && !(a && b);
}
};
template <typename T>
struct _LogicalNotFunctor {
inline bool operator()(const T& a) const { return !a; }
inline bool operator()(const bool& a) const { return !a; }
};
// template<typename Functor>
template <template <typename T> class Functor>
void BinaryLogicalCompute<Functor>::PrepareForRun() {}
template <template <typename T> class Functor>
template <class Functor>
// template<typename Functor>
void BinaryLogicalCompute<Functor>::Run() {
auto& param = this->Param<operators::LogicalParam>();
......@@ -57,72 +47,103 @@ void BinaryLogicalCompute<Functor>::Run() {
bool* z = param.Out->template mutable_data<bool>();
const bool* x = param.X->template data<bool>();
const bool* y = param.Y->template data<bool>();
using LogicalFunctor = Functor<bool>;
for (int i = 0; i < count; ++i) {
z[i] = LogicalFunctor()(x[i], y[i]);
z[i] = Functor()(x[i], y[i]);
}
}
template <template <typename> class Functor>
void UnaryLogicalCompute<Functor>::PrepareForRun() {}
template <template <typename> class Functor>
template <class Functor>
void UnaryLogicalCompute<Functor>::Run() {
auto& param = this->Param<operators::LogicalParam>();
const size_t count = param.X->numel();
bool* z = param.Out->template mutable_data<bool>();
const auto x = param.X->template data<bool>();
using LogicalFunctor = Functor<bool>;
for (int i = 0; i < count; ++i) {
z[i] = LogicalFunctor()(x[i]);
z[i] = Functor()(x[i]);
}
}
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(logical_xor,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::BinaryLogicalCompute<
paddle::lite::kernels::arm::_LogicalXorFunctor>,
kHost,
kAny,
kAny,
paddle::lite::kernels::host::BinaryLogicalCompute<
paddle::lite::kernels::host::_LogicalXorFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindInput("Y",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(logical_and,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::BinaryLogicalCompute<
paddle::lite::kernels::arm::_LogicalAndFunctor>,
kHost,
kAny,
kAny,
paddle::lite::kernels::host::BinaryLogicalCompute<
paddle::lite::kernels::host::_LogicalAndFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindInput("Y",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(logical_or,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::BinaryLogicalCompute<
paddle::lite::kernels::arm::_LogicalOrFunctor>,
kHost,
kAny,
kAny,
paddle::lite::kernels::host::BinaryLogicalCompute<
paddle::lite::kernels::host::_LogicalOrFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindInput("Y",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.Finalize();
REGISTER_LITE_KERNEL(logical_not,
kARM,
kFloat,
kNCHW,
paddle::lite::kernels::arm::UnaryLogicalCompute<
paddle::lite::kernels::arm::_LogicalNotFunctor>,
kHost,
kAny,
kAny,
paddle::lite::kernels::host::UnaryLogicalCompute<
paddle::lite::kernels::host::_LogicalNotFunctor>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kBool))})
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kBool),
DATALAYOUT(kAny))})
.Finalize();
......@@ -13,41 +13,33 @@
// limitations under the License.
#pragma once
#include <stdint.h>
#include "lite/backends/arm/math/type_trans.h"
#include "lite/core/kernel.h"
#include "lite/operators/logical_op.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
// template <typename Functor>
template <template <typename> class Functor>
namespace host {
template <class Functor>
class BinaryLogicalCompute
: public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
: public KernelLite<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
using param_t = operators::LogicalParam;
void PrepareForRun() override;
void Run() override;
~BinaryLogicalCompute() {}
};
template <template <typename> class Functor>
class UnaryLogicalCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
template <class Functor>
class UnaryLogicalCompute
: public KernelLite<TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
using param_t = operators::LogicalParam;
void PrepareForRun() override;
void Run() override;
~UnaryLogicalCompute() {}
};
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -20,7 +20,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM AND NOT LIT
#lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_logical_xor_compute SRCS logical_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_logical_compute SRCS logical_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
......@@ -20,86 +20,118 @@
namespace paddle {
namespace lite {
bool _logical_xor_func(const bool& a, const bool& b) {
return (a || b) && !(a && b);
}
bool _logical_and_func(const bool& a, const bool& b) { return (a && b); }
template <bool (*T)(const bool&, const bool&)>
class LogicalXorTester : public arena::TestCase {
struct _logical_and_func {
inline bool operator()(const bool& a, const bool& b) const { return a && b; }
};
struct _logical_or_func {
inline bool operator()(const bool& a, const bool& b) const { return a || b; }
};
struct _logical_xor_func {
inline bool operator()(const bool& a, const bool& b) const {
return (a || b) && !(a && b);
}
};
struct _logical_not_func {
inline bool operator()(const bool& a, const bool& b) const { return !a; }
};
template <class Functor>
class LogicalTester : public arena::TestCase {
protected:
std::string input_x_ = "x";
std::string input_y_ = "y";
std::string output_ = "out";
DDim dims_{{3, 5, 4, 4}};
std::string op_type_ = "logical_xor";
std::string x_ = "x";
std::string y_ = "y";
std::string out_ = "out";
DDim dims_{{2, 3, 4, 5}};
public:
LogicalXorTester(const Place& place, const std::string& alias, DDim dims)
: TestCase(place, alias), dims_(dims) {}
LogicalTester(const Place& place,
const std::string& alias,
const std::string& op_type)
: TestCase(place, alias), op_type_(op_type) {}
void RunBaseline(Scope* scope) override {
auto* out = scope->NewTensor(output_);
CHECK(out);
auto* x = scope->FindTensor(x_);
const bool* x_data = x->data<bool>();
const Tensor* y = nullptr;
const bool* y_data = nullptr;
if (op_type_ != "logical_not") {
y = scope->FindTensor(y_);
y_data = y->data<bool>();
}
auto* out = scope->NewTensor(out_);
out->Resize(dims_);
bool* out_data = out->mutable_data<bool>();
auto* x = scope->FindTensor(input_x_);
const bool* x_data = x->data<bool>();
auto* y = scope->FindTensor(input_y_);
const bool* y_data = y->data<bool>();
for (int i = 0; i < dims_.production(); i++) {
// out_data[i] = (x_data[i] || y_data[i]) && !((x_data[i] && y_data[i]));
out_data[i] = T(x_data[i], y_data[i]);
bool y_tmp = (y_data == nullptr) ? true : y_data[i];
out_data[i] = Functor()(x_data[i], y_tmp);
}
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("logical_xor");
op_desc->SetInput("X", {input_x_});
op_desc->SetInput("Y", {input_y_});
op_desc->SetOutput("Out", {output_});
op_desc->SetType(op_type_);
op_desc->SetInput("X", {x_});
if (op_type_ != "logical_not") {
op_desc->SetInput("Y", {y_});
}
op_desc->SetOutput("Out", {out_});
}
void PrepareData() override {
// std::vector<bool> data(dims_.production());
// std::vector<char> datay(dims_.production());
bool* data;
bool* datay;
data = reinterpret_cast<bool*>(malloc(dims_.production() * sizeof(bool)));
datay = reinterpret_cast<bool*>(malloc(dims_.production() * sizeof(bool)));
LOG(INFO) << "dims_.production()"
<< ":::" << dims_.production();
for (int i = 0; i < dims_.production(); i++) {
data[i] = 1;
datay[i] = 1;
bool* dx = new bool[dims_.production()];
for (int64_t i = 0; i < dims_.production(); i++) {
dx[i] = (i % 3 == 0);
}
SetCommonTensor(x_, dims_, dx);
delete dx;
SetCommonTensor(input_x_, dims_, data);
SetCommonTensor(input_y_, dims_, datay);
if (op_type_ != "logical_not") {
bool* dy = new bool[dims_.production()];
for (int64_t i = 0; i < dims_.production(); i++) {
dy[i] = (i % 2 == 0);
}
SetCommonTensor(y_, dims_, dy);
delete dy;
}
}
};
void test_logical(Place place) {
DDimLite dims{{3, 5, 4, 4}};
std::unique_ptr<arena::TestCase> logical_xor_tester(
new LogicalXorTester<_logical_xor_func>(place, "def", dims));
arena::Arena arena_xor(std::move(logical_xor_tester), place, 1);
void TestLogical(Place place, float abs_error) {
std::unique_ptr<arena::TestCase> logical_and_tester(
new LogicalTester<_logical_and_func>(place, "def", "logical_and"));
arena::Arena arena_and(std::move(logical_and_tester), place, abs_error);
arena_and.TestPrecision();
arena_xor.TestPrecision();
std::unique_ptr<arena::TestCase> logical_or_tester(
new LogicalTester<_logical_or_func>(place, "def", "logical_or"));
arena::Arena arena_or(std::move(logical_or_tester), place, abs_error);
arena_or.TestPrecision();
std::unique_ptr<arena::TestCase> logical_and_tester(
new LogicalXorTester<_logical_and_func>(place, "def", dims));
arena::Arena arena_and(std::move(logical_and_tester), place, 1);
std::unique_ptr<arena::TestCase> logical_xor_tester(
new LogicalTester<_logical_xor_func>(place, "def", "logical_xor"));
arena::Arena arena_xor(std::move(logical_xor_tester), place, abs_error);
arena_xor.TestPrecision();
arena_and.TestPrecision();
std::unique_ptr<arena::TestCase> logical_not_tester(
new LogicalTester<_logical_not_func>(place, "def", "logical_not"));
arena::Arena arena_not(std::move(logical_not_tester), place, abs_error);
arena_not.TestPrecision();
}
TEST(Logical, precision) {
// #ifdef LITE_WITH_X86
// // Place place(TARGET(kX86));
// // #endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_logical(place);
Place place;
float abs_error = 1e-5;
#if defined(LITE_WITH_ARM)
place = TARGET(kHost);
#else
return;
#endif
TestLogical(place, abs_error);
}
} // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册