未验证 提交 08afd3aa 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] enhance unittest for shuffle_channel, unsqueeze, pool (#2730)

* [NPU] enhance unittest for shuffle_channel, unsqueeze, pool

test=develop
上级 8e7906d0
......@@ -51,3 +51,5 @@ USE_SUBGRAPH_BRIDGE(sqrt, kNPU);
USE_SUBGRAPH_BRIDGE(square, kNPU);
USE_SUBGRAPH_BRIDGE(transpose, kNPU);
USE_SUBGRAPH_BRIDGE(transpose2, kNPU);
USE_SUBGRAPH_BRIDGE(unsqueeze, kNPU);
USE_SUBGRAPH_BRIDGE(unsqueeze2, kNPU);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/pool_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
namespace bridges {
void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
auto& in_dims = x->dims();
auto& out_dims = out->dims();
const float* src_ptr = x->data<const float>();
float* dst_ptr = out->mutable_data<float>();
std::vector<int> ksize = op_info->GetAttr<std::vector<int>>("ksize");
std::vector<int> strides = op_info->GetAttr<std::vector<int>>("strides");
std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
bool exclusive = op_info->GetAttr<bool>("exclusive");
std::string pooling_type = op_info->GetAttr<std::string>("pooling_type");
bool global_pooling = op_info->GetAttr<bool>("global_pooling");
int in_n = in_dims[0];
int in_c = in_dims[1];
int in_h = in_dims[2];
int in_w = in_dims[3];
int size_in_n = in_c * in_h * in_w;
int size_in_c = in_h * in_w;
int out_h = out_dims[2];
int out_w = out_dims[3];
int size_out_n = in_c * out_h * out_w;
int size_out_c = out_h * out_w;
int window_h = ksize[0];
int window_w = ksize[1];
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[2];
if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) {
for (int c = 0; c < in_c; ++c) {
const float* src = src_ptr + n * size_in_n + c * size_in_c;
float res = src[0];
if (pooling_type == "max") {
for (int i = 1; i < size_in_c; ++i) {
float cur_val = src[i];
res = cur_val > res ? cur_val : res;
}
} else if (pooling_type == "avg") {
for (int i = 1; i < size_in_c; ++i) {
float cur_val = src[i];
res += cur_val;
}
res /= size_in_c;
}
dst_ptr[n * size_out_n + c] = res;
}
}
} else {
for (int n = 0; n < in_n; ++n) {
for (int c = 0; c < in_c; ++c) {
for (int h = 0; h < out_h; ++h) {
int sh = h * stride_h;
int eh = sh + window_h;
sh = (sh - pad_h) < 0 ? 0 : sh - pad_h;
eh = (eh - pad_h) > in_h ? in_h : eh - pad_h;
for (int w = 0; w < out_w; ++w) {
int sw = w * stride_w;
int ew = sw + window_w;
sw = (sw - pad_w) < 0 ? 0 : sw - pad_w;
ew = (ew - pad_w) > in_w ? in_w : ew - pad_w;
int pooling_size = (ew - sw) * (eh - sh);
if (pooling_size == 0) continue;
float res = 0.f;
for (int kh = sh; kh < eh; ++kh) {
for (int kw = sw; kw < ew; ++kw) {
int src_idx = n * size_in_n + c * size_in_c + kh * in_w + kw;
if (kh == sh && kw == sw) {
res = src_ptr[src_idx];
} else {
if (pooling_type == "max") {
res = res >= src_ptr[src_idx] ? res : src_ptr[src_idx];
}
if (pooling_type == "avg") {
res += src_ptr[src_idx];
}
}
}
}
if (pooling_type == "avg") {
if (exclusive) {
res /= pooling_size;
} else {
res /= window_h * window_w;
}
}
dst_ptr[n * size_out_n + c * size_out_c + h * out_w + w] = res;
}
}
}
}
}
}
void test_pool(int bs,
int ic,
int ih,
int iw,
std::string pooling_type,
bool ceil_mode,
bool global_pooling,
bool exclusive,
int ksize,
int stride,
int padding) {
// prepare input&output variables
Scope scope;
std::string x_var_name = "x";
std::string out_var_name = "out";
std::string out_ref_var_name = "out_ref";
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize({bs, ic, ih, iw});
// initialize input&output data
FillTensor<float>(x);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("pool2d");
opdesc.SetInput("X", {x_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("pooling_type", pooling_type);
opdesc.SetAttr("ksize", std::vector<int>({ksize, ksize}));
opdesc.SetAttr("global_pooling", global_pooling);
opdesc.SetAttr("exclusive", exclusive);
opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
opdesc.SetAttr("paddings",
std::vector<int>({padding, padding, padding, padding}));
// create and convert op to NPU model, then run it on NPU
auto op = CreateOp<operators::PoolOpLite>(opdesc, &scope);
LauchOp(op, {x_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
// execute reference implementation and save to output tensor
pool_ref(op);
// compare results
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2);
}
}
TEST(NPUBridges, pool) {
for (auto pooling_type : {"max", "avg"}) {
for (auto ceil_mode : {true, false}) {
for (auto global_pooling : {/*true, */ false}) {
for (auto exclusive : {true /*, false*/}) {
for (auto ksize : {2, 3}) {
for (auto stride : {1, 2}) {
for (auto padding : {0, 1}) {
for (auto bs : {1, 3}) {
for (auto ic : {1, 3}) {
for (auto ih : {3, 7}) {
for (auto iw : {3, 7}) {
test_pool(bs,
ic,
ih,
iw,
pooling_type,
ceil_mode,
global_pooling,
exclusive,
ksize,
stride,
padding);
}
}
}
}
}
}
}
}
}
}
}
for (auto pooling_type : {"max", "avg"}) {
for (auto ceil_mode : {true, false}) {
bool global_pooling = true;
bool exclusive = true;
int ksize = 2;
int stride = 1;
int padding = 0;
int bs = 6;
int ic = 6;
int ih = 6;
int iw = 6;
test_pool(bs,
ic,
ih,
iw,
pooling_type,
ceil_mode,
global_pooling,
exclusive,
ksize,
stride,
padding);
}
}
}
} // namespace bridges
} // namespace npu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(pool2d);
USE_NPU_BRIDGE(pool2d);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/shuffle_channel_op.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
namespace bridges {
void shuffle_channel_ref(
const std::shared_ptr<operators::ShuffleChannelOpLite> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
auto out =
scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
auto x_data = x->mutable_data<float>();
auto out_data = out->mutable_data<float>();
int group = op_info->GetAttr<int>("group");
auto x_dims = x->dims();
int n_size = x_dims.production() / x_dims[0];
int c_size = n_size / x_dims[1];
for (int n = 0; n < x_dims[0]; n++) {
int g_num = x_dims[1] / group;
auto tmp_out_data = out_data;
for (int g = 0; g < g_num; g++) {
auto tmp_x_data = x_data + g * c_size;
for (int i = 0; i < group; i++) {
std::memcpy(tmp_out_data,
tmp_x_data + i * g_num * c_size,
c_size * sizeof(float));
tmp_out_data += c_size;
}
}
x_data += n_size;
out_data += n_size;
}
}
void test_shuffle_channel(int bs, int ic, int ih, int iw, int group) {
// prepare input&output variables
Scope scope;
std::string x_var_name = "x";
std::string out_var_name = "out";
std::string out_ref_var_name = "out_ref";
auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
x->Resize({bs, ic, ih, iw});
// initialize input&output data
FillTensor<float>(x);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("shuffle_channel");
opdesc.SetInput("X", {x_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("group", group);
// create and convert op to NPU model, then run it on NPU
auto op = CreateOp<operators::ShuffleChannelOpLite>(opdesc, &scope);
LauchOp(op, {x_var_name}, {out_var_name});
out_ref->CopyDataFrom(*out);
// execute reference implementation and save to output tensor
shuffle_channel_ref(op);
// compare results
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2);
}
}
TEST(NPUBridges, softmax) {
for (auto bs : {1, 4}) {
for (auto ic : {1, 24, 35}) {
for (auto ih : {1, 4}) {
for (auto iw : {1, 4}) {
for (auto group : {1, 3, 7, 24, 35}) {
if (ic % group != 0) continue;
test_shuffle_channel(bs, ic, ih, iw, group);
}
}
}
}
}
}
} // namespace bridges
} // namespace npu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(shuffle_channel);
USE_NPU_BRIDGE(shuffle_channel);
......@@ -32,13 +32,12 @@ int UnsqueezeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat));
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->precision() == PRECISION(kFloat));
CHECK(out_type->layout() == DATALAYOUT(kNCHW));
auto out_shape = scope->FindTensor(out_name)->dims().Vectorize();
CHECK(op_info->HasAttr("axes"))
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/unsqueeze_op.h"
#include <gtest/gtest.h>
#include <cmath>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
namespace bridges {
static DDim GetOutputShape(const std::vector<int>& unsqz_dims,
const DDim& in_dims) {
int output_size = in_dims.size() + static_cast<int>(unsqz_dims.size());
int cur_output_size = in_dims.size();
std::vector<int64_t> output_shape(output_size, 0);
// Validate Check: rank range.
CHECK_LE(output_size, 6) << "The output tensor's rank should be less than 6.";
for (int axis : unsqz_dims) {
int cur = axis < 0 ? axis + cur_output_size + 1 : axis;
// Validate Check: the axis bound
CHECK((cur >= 0) && (cur <= cur_output_size))
<< "The unsqueeze dims must be within range of current rank.";
// Move old axis, and insert new axis
for (int i = cur_output_size; i >= cur; --i) {
if (output_shape[i] == 1) {
// Move axis
output_shape[i + 1] = 1;
output_shape[i] = 0;
}
}
output_shape[cur] = 1;
// Add the output size.
cur_output_size++;
}
// Make output shape
for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) {
if (output_shape[out_idx] == 0) {
output_shape[out_idx] = in_dims[in_idx++];
}
}
return DDim(output_shape);
}
template <typename dtype>
void unsqueeze_ref(const std::shared_ptr<operators::UnsqueezeOp> op) {
Scope* scope = op->scope();
const OpInfo* op_info = op->op_info();
auto x = scope->FindTensor("x");
auto out = scope->FindMutableTensor("out_ref");
auto axes = op_info->GetAttr<std::vector<int>>("axes");
auto y_dims = GetOutputShape(axes, x->dims());
out->Resize(y_dims);
auto x_data = x->data<dtype>();
auto out_data = out->mutable_data<dtype>();
memcpy(out_data, x_data, x->numel() * sizeof(float));
}
void test_unsqueeze(const std::vector<int64_t>& input_shape,
std::vector<int> axes) {
// prepare input&output variables
Scope scope;
std::string x_var_name = "x";
std::string out_var_name = "out";
std::string out_ref_var_name = "out_ref";
auto* x = scope.NewTensor(x_var_name);
auto* out = scope.NewTensor(out_var_name);
auto* out_ref = scope.NewTensor(out_ref_var_name);
x->Resize(input_shape);
// initialize input&output data
FillTensor<float>(x);
// initialize op desc
cpp::OpDesc opdesc;
opdesc.SetType("unsqueeze");
opdesc.SetInput("X", {x_var_name});
opdesc.SetOutput("Out", {out_var_name});
opdesc.SetAttr("axes", axes);
// create and convert op to NPU model, then run it on NPU
auto op = CreateOp<operators::UnsqueezeOp>(opdesc, &scope);
LauchOp(op, {x_var_name}, {out_var_name});
// execute reference implementation and save to output tensor
unsqueeze_ref<float>(op);
// compare results
CHECK_EQ(out->dims().size(), out_ref->dims().size());
for (int i = 0; i < out->dims().size(); i++) {
CHECK_EQ(out->dims()[i], out_ref->dims()[i]);
}
auto* out_data = out->mutable_data<float>();
auto* out_ref_data = out_ref->mutable_data<float>();
for (int i = 0; i < out->dims().production(); i++) {
EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-2);
}
}
TEST(NPUBridges, unsqueeze) {
test_unsqueeze({2}, {0, 2});
test_unsqueeze({2, 3}, {1, 3});
test_unsqueeze({1, 2, 3}, {3});
test_unsqueeze({5, 6, 7}, {1});
}
} // namespace bridges
} // namespace npu
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_OP(unsqueeze);
USE_NPU_BRIDGE(unsqueeze);
if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH_ARM))
lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_elementwise_compute SRCS elementwise_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......@@ -25,19 +25,20 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
#lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_concat_compute SRCS concat_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${npu_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_transpose_compute SRCS transpose_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reshape_compute SRCS reshape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${npu_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
if(LITE_BUILD_EXTRA)
lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace paddle {
namespace lite {
class PoolComputeTest : public arena::TestCase {
protected:
// common attributes for this op.
std::string op_type_ = "pool2d";
std::string x_ = "x";
std::string out_ = "out";
DDim dims_{{1, 2, 3, 4}};
std::string pooling_type_ = "max";
bool global_pooling_ = false;
std::vector<int> strides_{1, 1};
std::vector<int> paddings_{0, 0};
std::vector<int> ksize_{2, 2};
bool exclusive_ = true;
bool ceil_mode_ = false;
bool adaptive_ = false;
std::string padding_algorithm_;
public:
PoolComputeTest(const Place& place,
const std::string& alias,
DDim dims,
std::string pooling_type,
bool global_pooling,
std::vector<int> strides = {1, 1},
std::vector<int> paddings = {0, 0},
std::vector<int> ksize = {2, 2},
bool exclusive = true,
bool ceil_mode = false,
bool adaptive = false,
std::string padding_algorithm = "")
: TestCase(place, alias),
dims_(dims),
pooling_type_(pooling_type),
global_pooling_(global_pooling),
strides_(strides),
paddings_(paddings),
ksize_(ksize),
exclusive_(exclusive),
ceil_mode_(ceil_mode),
adaptive_(adaptive) {}
void RunBaseline(Scope* scope) override {
std::vector<int> paddings_new{paddings_};
if (paddings_new.size() == 1L) {
paddings_new = std::vector<int>(4, paddings_new[0]);
} else if (paddings_new.size() == 2L) {
paddings_new.insert(paddings_new.begin(), paddings_new[0]);
paddings_new.insert(paddings_new.begin() + 2, paddings_new[2]);
}
CHECK_EQ(paddings_new.size(), 4L);
if (padding_algorithm_ == "SAME") {
for (int i = 0; i < strides_.size(); ++i) {
int out_size = (dims_[i + 2] + strides_[i] - 1) / strides_[i];
int pad_sum =
std::max((out_size - 1) * strides_[i] + ksize_[i] - dims_[i + 2],
(int64_t)0);
int pad_0 = pad_sum / 2;
int pad_1 = pad_sum - pad_0;
*(paddings_new.begin() + i * 2) = pad_0;
*(paddings_new.begin() + i * 2 + 1) = pad_1;
}
}
if (padding_algorithm_ == "VALID" || global_pooling_ || adaptive_) {
for (size_t i = 0; i < paddings_new.size(); i++) {
paddings_new[i] = 0;
}
}
std::vector<int> ksize_new{ksize_};
if (global_pooling_) {
ksize_new.clear();
ksize_new.push_back(dims_[2]);
ksize_new.push_back(dims_[3]);
}
std::vector<int64_t> out_shape{dims_[0], dims_[1]};
if (adaptive_) {
out_shape.insert(out_shape.end(), ksize_new.begin(), ksize_new.end());
} else {
for (size_t i = 0; i < ksize_new.size(); ++i) {
int out_size;
if (!ceil_mode_) {
out_size = (dims_[i + 2] - ksize_new[i] + paddings_new[2 * i] +
paddings_new[2 * i + 1]) /
strides_[i] +
1;
} else {
out_size = (dims_[i + 2] - ksize_new[i] + paddings_new[2 * i] +
paddings_new[2 * i + 1] + strides_[i] - 1) /
strides_[i] +
1;
}
out_shape.push_back(out_size);
}
}
auto out = scope->NewTensor(out_);
CHECK(out);
out->Resize(DDim(out_shape));
auto out_dims = out->dims();
auto dst_ptr = out->mutable_data<float>();
auto x = scope->FindTensor(x_);
auto src_ptr = x->data<float>();
int in_n = dims_[0];
int in_c = dims_[1];
int in_h = dims_[2];
int in_w = dims_[3];
int size_in_n = in_c * in_h * in_w;
int size_in_c = in_h * in_w;
int out_h = out_dims[2];
int out_w = out_dims[3];
int size_out_n = in_c * out_h * out_w;
int size_out_c = out_h * out_w;
int window_h = ksize_new[0];
int window_w = ksize_new[1];
int stride_h = strides_[0];
int stride_w = strides_[1];
int pad_t = paddings_new[0];
int pad_l = paddings_new[2];
if (global_pooling_) {
for (int n = 0; n < in_n; ++n) {
for (int c = 0; c < in_c; ++c) {
const float* src = src_ptr + n * size_in_n + c * size_in_c;
float res = src[0];
if (pooling_type_ == "max") {
for (int i = 1; i < size_in_c; ++i) {
float cur_val = src[i];
res = cur_val > res ? cur_val : res;
}
} else if (pooling_type_ == "avg") {
for (int i = 1; i < size_in_c; ++i) {
float cur_val = src[i];
res += cur_val;
}
res /= size_in_c;
}
dst_ptr[n * size_out_n + c] = res;
}
}
} else {
for (int n = 0; n < in_n; ++n) {
for (int c = 0; c < in_c; ++c) {
for (int h = 0; h < out_h; ++h) {
int sh = h * stride_h;
int eh = sh + window_h;
sh = (sh - pad_t) < 0 ? 0 : sh - pad_t;
eh = (eh - pad_t) > in_h ? in_h : eh - pad_t;
for (int w = 0; w < out_w; ++w) {
int sw = w * stride_w;
int ew = sw + window_w;
sw = (sw - pad_l) < 0 ? 0 : sw - pad_l;
ew = (ew - pad_l) > in_w ? in_w : ew - pad_l;
int pooling_size = (ew - sw) * (eh - sh);
if (pooling_size == 0) continue;
float res = 0.f;
for (int kh = sh; kh < eh; ++kh) {
for (int kw = sw; kw < ew; ++kw) {
int src_idx = n * size_in_n + c * size_in_c + kh * in_w + kw;
if (kh == sh && kw == sw) {
res = src_ptr[src_idx];
} else {
if (pooling_type_ == "max") {
res = res >= src_ptr[src_idx] ? res : src_ptr[src_idx];
}
if (pooling_type_ == "avg") {
res += src_ptr[src_idx];
}
}
}
}
if (pooling_type_ == "avg") {
if (exclusive_) {
res /= pooling_size;
} else {
res /= window_h * window_w;
}
}
dst_ptr[n * size_out_n + c * size_out_c + h * out_w + w] = res;
}
}
}
}
}
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType(op_type_);
op_desc->SetInput("X", {x_});
op_desc->SetOutput("Out", {out_});
op_desc->SetAttr("pooling_type", pooling_type_);
op_desc->SetAttr("global_pooling", global_pooling_);
op_desc->SetAttr("strides", strides_);
op_desc->SetAttr("paddings", paddings_);
op_desc->SetAttr("ksize", ksize_);
op_desc->SetAttr("exclusive", exclusive_);
op_desc->SetAttr("ceil_mode", ceil_mode_);
op_desc->SetAttr("adaptive", adaptive_);
if (!padding_algorithm_.empty()) {
op_desc->SetAttr("padding_algorithm", padding_algorithm_);
}
}
void PrepareData() override {
std::vector<float> din(dims_.production());
fill_data_rand(din.data(), -1.f, 1.f, dims_.production());
SetCommonTensor(x_, dims_, din.data());
}
};
void TestPoolGlobal(Place place, float abs_error = 2e-5) {
for (auto dims : std::vector<std::vector<int64_t>>{{2, 3, 4, 5}}) {
for (std::string pooling_type : {"max", "avg"}) {
std::unique_ptr<arena::TestCase> tester(
new PoolComputeTest(place, "def", DDim(dims), pooling_type, true));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
void TestPoolAlgorithm(Place place, float abs_error = 2e-5) {
for (auto dims : std::vector<std::vector<int64_t>>{{2, 3, 4, 5}}) {
for (auto pooling_type : {"max", "avg"}) {
for (auto padding_algorithm : {"SAME", "VALID"}) {
std::unique_ptr<arena::TestCase> tester(
new PoolComputeTest(place,
"def",
DDim(dims),
pooling_type,
false,
{2, 2},
{0, 0},
{2, 2},
true,
false,
false,
padding_algorithm));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
void TestPoolHelper(Place place,
float abs_error,
std::vector<int64_t> dims,
std::string pooling_type,
std::vector<int> strides,
std::vector<int> paddings,
std::vector<int> ksize) {
std::unique_ptr<arena::TestCase> tester(new PoolComputeTest(
place, "def", DDim(dims), pooling_type, false, strides, paddings, ksize));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
void TestPoolStrides(Place place, float abs_error = 2e-5) {
for (auto pooling_type : {"max", "avg"}) {
TestPoolHelper(
place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 1}, {0, 0}, {2, 2});
TestPoolHelper(
place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 2}, {0, 0}, {2, 2});
TestPoolHelper(
place, abs_error, {2, 3, 6, 7}, pooling_type, {2, 2}, {0, 0}, {2, 2});
}
}
void TestPoolPaddings(Place place, float abs_error = 2e-5) {
for (auto pooling_type : {"max", "avg"}) {
TestPoolHelper(
place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 1}, {0, 0}, {2, 2});
TestPoolHelper(
place, abs_error, {2, 3, 6, 7}, pooling_type, {1, 1}, {1, 1}, {2, 2});
TestPoolHelper(place,
abs_error,
{2, 3, 6, 7},
pooling_type,
{1, 1},
{0, 0, 1, 1},
{2, 2});
TestPoolHelper(place,
abs_error,
{2, 3, 6, 7},
pooling_type,
{1, 1},
{1, 0, 1, 0},
{2, 2});
TestPoolHelper(place,
abs_error,
{2, 3, 6, 7},
pooling_type,
{1, 1},
{1, 0, 0, 1},
{2, 2});
}
}
void TestPoolKsize(Place place, float abs_error = 2e-5) {
for (auto pooling_type : {"max", "avg"}) {
for (auto ksize : {2, 3}) {
TestPoolHelper(place,
abs_error,
{2, 3, 6, 7},
pooling_type,
{1, 1},
{0, 0},
{ksize, ksize});
TestPoolHelper(place,
abs_error,
{2, 3, 6, 7},
pooling_type,
{2, 2},
{1, 1},
{ksize, ksize});
}
}
}
TEST(Pool, precision) {
LOG(INFO) << "test pool op";
float abs_error = 2e-5;
Place place;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#else
return;
#endif
TestPoolGlobal(place, abs_error);
TestPoolAlgorithm(place, abs_error);
TestPoolStrides(place, abs_error);
TestPoolPaddings(place, abs_error);
TestPoolKsize(place, abs_error);
}
} // namespace lite
} // namespace paddle
......@@ -12,12 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// TODO(FrostML): shaffle_channel cannot pass on CI, but ok in local machine.
// Open this.
/*#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace paddle {
namespace lite {
......@@ -40,28 +39,29 @@ class ShuffleChannelComputeTester : public arena::TestCase {
auto* out = scope->NewTensor(output_);
CHECK(out);
out->Resize(dims_);
auto* outputs = out->mutable_data<float>();
auto* out_data = out->mutable_data<float>();
auto* x = scope->FindTensor(input_);
const auto* inputs = x->data<float>();
DDim x_dims = x->dims();
int num = x->dims()[0];
int channel = x->dims()[1];
int height = x->dims()[2];
int width = x->dims()[3];
int fea_size = channel * height * width;
const auto* in_data = x->data<float>();
int num = dims_[0];
int channel = dims_[1];
int height = dims_[2];
int width = dims_[3];
int feather_size = channel * height * width;
int spatial_size = height * width;
int group_row = group_;
int group_col = channel / group_;
for (int k = 0; k < num; ++k) {
inputs += k * fea_size;
outputs += k * fea_size;
for (int i = 0; i < group_row; ++i) {
for (int j = 0; j < group_col; ++j) {
const float* p_i = inputs + (i * group_col + j) * spatial_size;
float* p_o = outputs + (j * group_row + i) * spatial_size;
int group_num = group_;
int group_size = channel / group_;
for (int n = 0; n < num; n++) {
for (int i = 0; i < group_num; ++i) {
for (int j = 0; j < group_size; ++j) {
const float* p_i = in_data + (i * group_size + j) * spatial_size;
float* p_o = out_data + (j * group_num + i) * spatial_size;
memcpy(p_o, p_i, spatial_size * sizeof(float));
}
}
in_data += feather_size;
out_data += feather_size;
}
}
......@@ -73,35 +73,33 @@ class ShuffleChannelComputeTester : public arena::TestCase {
}
void PrepareData() override {
std::vector<float> data(dims_.production());
for (int i = 0; i < dims_.production(); i++) {
data[i] = i * 1.1;
}
SetCommonTensor(input_, dims_, data.data());
std::vector<float> din(dims_.production());
fill_data_rand(din.data(), -1.f, 1.f, dims_.production());
SetCommonTensor(input_, dims_, din.data());
}
};
void test_shuffle_channel(Place place) {
for (int group : {4}) {
void test_shuffle_channel(Place place, float abs_error = 2e-5) {
for (int group : {2, 4, 8}) {
std::unique_ptr<arena::TestCase> tester(
new ShuffleChannelComputeTester(place, "def", group));
arena::Arena arena(std::move(tester), place, 2e-5);
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
TEST(ShuffleChannel, precision) {
// #ifdef LITE_WITH_X86
// Place place(TARGET(kX86));
// #endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_shuffle_channel(place);
Place place;
float abs_error = 2e-5;
#ifdef LITE_WITH_NPU
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#else
return;
#endif
test_shuffle_channel(place, abs_error);
}
} // namespace lite
} // namespace paddle
*/
......@@ -223,67 +223,73 @@ class Unsqueeze2ComputeTester : public arena::TestCase {
}
};
void test_unsqueeze(Place place) {
void test_unsqueeze(Place place, float abs_error = 2e-5) {
for (std::vector<int> axes : {std::vector<int>({1}),
std::vector<int>({0, 2}),
std::vector<int>({0, -2})}) {
for (int N : {1}) {
for (int C : {3}) {
for (int H : {1}) {
for (int W : {5}) {
for (int input_axes_flag : {1, 2, 3}) {
LOG(INFO) << N << " " << C << " " << H << " " << W << " "
<< input_axes_flag;
std::unique_ptr<arena::TestCase> tester(
new UnsqueezeComputeTester(
place, "def", axes, DDim({N, C, H, W}), input_axes_flag));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
}
}
}
for (auto dims : std::vector<std::vector<int64_t>>{{3}, {3, 5}, {3, 5, 7}})
for (int input_axes_flag : {1, 2, 3}) {
#ifdef LITE_WITH_NPU
if (input_axes_flag != 1) continue;
if (dims.size() + axes.size() > 4) continue;
#endif
std::unique_ptr<arena::TestCase> tester(new UnsqueezeComputeTester(
place, "def", axes, DDim(dims), input_axes_flag));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
void test_unsqueeze2(Place place) {
void test_unsqueeze2(Place place,
float abs_error = 2e-5,
std::vector<std::string> ignored_outs = {}) {
for (std::vector<int> axes : {std::vector<int>({0}),
std::vector<int>({0, 2}),
std::vector<int>({0, -2})}) {
for (int N : {1}) {
for (int C : {3}) {
for (int H : {1}) {
for (int W : {5}) {
std::unique_ptr<arena::TestCase> tester(new Unsqueeze2ComputeTester(
place, "def", axes, DDim({N, C, H, W})));
arena::Arena arena(std::move(tester), place, 2e-5);
arena.TestPrecision();
}
}
}
for (auto dims :
std::vector<std::vector<int64_t>>{{3}, {3, 5}, {3, 5, 7}}) {
#ifdef LITE_WITH_NPU
if (dims.size() + axes.size() > 4) continue;
#endif
std::unique_ptr<arena::TestCase> tester(
new Unsqueeze2ComputeTester(place, "def", axes, DDim(dims)));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision(ignored_outs);
}
}
}
TEST(squeeze, precision) {
#ifdef LITE_WITH_X86
Place place(TARGET(kX86));
#endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_unsqueeze(place);
Place place;
float abs_error = 2e-5;
#ifdef LITE_WITH_NPU
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#else
return;
#endif
test_unsqueeze(place, abs_error);
}
TEST(squeeze2, precision) {
#ifdef LITE_WITH_X86
Place place(TARGET(kX86));
#endif
#ifdef LITE_WITH_ARM
Place place(TARGET(kARM));
test_unsqueeze2(place);
Place place;
float abs_error = 2e-5;
std::vector<std::string> ignored_outs = {};
#ifdef LITE_WITH_NPU
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
ignored_outs.push_back("XShape"); // not supported out in NPU
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#else
return;
#endif
test_unsqueeze2(place, abs_error, ignored_outs);
}
} // namespace lite
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册