diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 3d6eb9eb8ace0dde1fba92af88ab3af20a87a2ed..c55f62c02977cec54b1ef679a038e06cb576b6b8 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -44,6 +44,7 @@ if(LITE_BUILD_EXTRA) lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_sum_compute SRCS reduce_sum_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tests/kernels/reduce_sum_compute_test.cc b/lite/tests/kernels/reduce_sum_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..9cfe213750b1191c1ef8fe7fba1b1c1035c2ae42 --- /dev/null +++ b/lite/tests/kernels/reduce_sum_compute_test.cc @@ -0,0 +1,350 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" + +namespace paddle { +namespace lite { + +void reduce_sum_n(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + int hw_size = height_in * width_in; + int chw_size = channel_in * hw_size; + int data_index, src_index; + for (int c = 0; c < channel_in; ++c) { + for (int h = 0; h < height_in; ++h) { + for (int w = 0; w < width_in; ++w) { + data_index = c * hw_size + h * width_in + w; + dst[data_index] = 0.0; + for (int n = 0; n < num_in; ++n) { + src_index = n * chw_size + data_index; + dst[data_index] += static_cast(src[src_index]); + } + } + } + } +} + +void reduce_sum_c(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + int hw_size = height_in * width_in; + int chw_size = hw_size * channel_in; + int data_index, src_index0, src_index; + for (int n = 0; n < num_in; ++n) { + for (int h = 0; h < height_in; ++h) { + for (int w = 0; w < width_in; ++w) { + data_index = n * hw_size + h * width_in + w; + src_index0 = n * chw_size + h * width_in + w; + dst[data_index] = 0.0; + for (int c = 0; c < channel_in; ++c) { + src_index = src_index0 + c * hw_size; + dst[data_index] += static_cast(src[src_index]); + } + } + } + } +} + +void reduce_sum_h(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + int cw_size = channel_in * width_in; + int chw_size = cw_size * height_in; + int hw_size = height_in * width_in; + int data_index, src_index, src_index0; + for (int n = 0; n < num_in; ++n) { + for (int c = 0; c < channel_in; ++c) { + for (int w = 0; w < width_in; ++w) { + data_index = n * cw_size + c * width_in + w; + src_index0 = n * chw_size + c * hw_size + w; + dst[data_index] = 0.0; + for (int h = 0; h < height_in; ++h) { + src_index = src_index0 + h * width_in; + dst[data_index] += static_cast(src[src_index]); + } + } + } + } +} + +void reduce_sum_w(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + int ch_size = channel_in * height_in; + int hw_size = height_in * width_in; + int chw_size = ch_size * width_in; + int data_index = 0; + int src_index0 = 0; + int src_index = 0; + for (int n = 0; n < num_in; ++n) { + for (int c = 0; c < channel_in; ++c) { + for (int h = 0; h < height_in; ++h) { + data_index = n * ch_size + c * height_in + h; + src_index0 = n * chw_size + c * hw_size + h * width_in; + dst[data_index] = 0.0; + for (int w = 0; w < width_in; ++w) { + src_index = src_index0 + w; + dst[data_index] += static_cast(src[src_index]); + } + } + } + } +} + +void reduce_sum_all(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + float sum = 0.0; + int src_index; + int n_id, c_id; + for (int n = 0; n < num_in; ++n) { + n_id = n * channel_in * height_in * width_in; + for (int c = 0; c < channel_in; ++c) { + c_id = c * height_in * width_in; + for (int h = 0; h < height_in; ++h) { + for (int w = 0; w < width_in; ++w) { + src_index = n_id + c_id + h * width_in + w; + sum = sum + src[src_index]; + } + } + } + } + dst[0] = sum; +} + +void reduce_sum_nc(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + // reduce n first. + DDimLite ddimA({1, channel_in, height_in, width_in}); + lite::Tensor tensor_tmp; + tensor_tmp.Resize(ddimA); + float* tmp_out = tensor_tmp.mutable_data(); + reduce_sum_n(src, tmp_out, num_in, channel_in, height_in, width_in); + reduce_sum_c(tmp_out, dst, 1, channel_in, height_in, width_in); +} + +void reduce_sum_ch(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + // reduce c first + DDimLite ddimA({num_in, 1, height_in, width_in}); + lite::Tensor tensor_tmp; + tensor_tmp.Resize(ddimA); + float* tmp_out = tensor_tmp.mutable_data(); + reduce_sum_c(src, tmp_out, num_in, channel_in, height_in, width_in); + reduce_sum_h(tmp_out, dst, num_in, 1, height_in, width_in); +} + +void reduce_sum_hw(const float* src, + float* dst, + int num_in, + int channel_in, + int height_in, + int width_in) { + // reduce h first + DDimLite ddimA({num_in, channel_in, 1, width_in}); + lite::Tensor tensor_tmp; + tensor_tmp.Resize(ddimA); + float* tmp_out = tensor_tmp.mutable_data(); + reduce_sum_h(src, tmp_out, num_in, channel_in, height_in, width_in); + reduce_sum_w(tmp_out, dst, num_in, channel_in, 1, width_in); +} + +class ReduceSumComputeTester : public arena::TestCase { + protected: + // common attributes for this op. + std::string input_ = "x"; + std::string output_ = "out"; + std::vector dim_{0}; + bool keep_dim_ = false; + bool reduce_all_ = false; + DDim x_dims_{{3, 2, 3, 4}}; + + public: + ReduceSumComputeTester(const Place& place, + const std::string& alias, + std::vector dim, + bool keep_dim, + bool reduce_all, + DDim x_dims) + : TestCase(place, alias), + dim_(dim), + keep_dim_(keep_dim), + reduce_all_(reduce_all), + x_dims_(x_dims) {} + + void RunBaseline(Scope* scope) override { + auto* x = scope->FindMutableTensor(input_); + const auto* x_data = x->data(); + auto* out = scope->NewTensor(output_); + auto x_rank = x_dims_.size(); + if (!dim_.empty()) { + for (int i = 0; i < dim_.size(); i++) { + if (dim_[i] < 0) { + dim_[i] += x_rank; + } + } + } + + sort(dim_.begin(), dim_.end()); + std::vector out_dims; + if (reduce_all_) { + if (keep_dim_) { + out_dims.resize(x_rank); + for (int i = 0; i < x_rank; ++i) { + out_dims[i] = 1; + } + } else { + out_dims.push_back(1); + } + } else { + for (int i = 0; i < x_dims_.size(); i++) { + out_dims.push_back(x_dims_[i]); + } + if (keep_dim_) { + for (size_t i = 0; i < dim_.size(); ++i) { + out_dims[dim_[i]] = 1L; + } + } else { + int64_t kDelFlag = -2; + for (size_t i = 0; i < dim_.size(); ++i) { + out_dims[dim_[i]] = kDelFlag; + } + out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag), + out_dims.end()); + } + } + out->Resize(DDim(out_dims)); + + auto* out_data = out->mutable_data(); + int in_n = x_dims_[0]; + int in_c = x_dims_[1]; + int in_h = x_dims_[2]; + int in_w = x_dims_[3]; + + if (reduce_all_) { + reduce_sum_all(x_data, out_data, in_n, in_c, in_h, in_w); + } else if (dim_.size() == 1) { + switch (dim_[0]) { + case 0: + reduce_sum_n(x_data, out_data, in_n, in_c, in_h, in_w); + break; + case 1: + reduce_sum_c(x_data, out_data, in_n, in_c, in_h, in_w); + break; + case 2: + reduce_sum_h(x_data, out_data, in_n, in_c, in_h, in_w); + break; + case 3: + reduce_sum_w(x_data, out_data, in_n, in_c, in_h, in_w); + break; + default: + LOG(FATAL) << "error!!!"; + } + } else if (dim_.size() == 2) { + if (dim_[0] == 0 && dim_[1] == 1) { + reduce_sum_nc(x_data, out_data, in_n, in_c, in_h, in_w); + } else if (dim_[0] == 1 && dim_[1] == 2) { + reduce_sum_ch(x_data, out_data, in_n, in_c, in_h, in_w); + } else if (dim_[0] == 2 && dim_[1] == 3) { + reduce_sum_hw(x_data, out_data, in_n, in_c, in_h, in_w); + } else { + LOG(FATAL) << "invalid dims_!!"; + } + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("reduce_sum"); + op_desc->SetInput("X", {input_}); + op_desc->SetOutput("Out", {output_}); + op_desc->SetAttr("dim", dim_); + op_desc->SetAttr("keep_dim", keep_dim_); + op_desc->SetAttr("reduce_all", reduce_all_); + } + + void PrepareData() override { + std::vector data(x_dims_.production()); + for (int i = 0; i < x_dims_.production(); i++) { + data[i] = i * 1.0; + } + SetCommonTensor(input_, x_dims_, data.data()); + } +}; + +void test_reduce_sum(Place place) { + std::vector> reduce_dim{ + {0}, {1}, {2}, {3}, {0, 1}, {1, 2}, {2, 3}, {-2, -1}}; + for (auto n : {1, 3}) { + for (auto c : {1, 2}) { + for (auto h : {1, 3}) { + for (auto w : {1, 3}) { + for (bool keep_dim : {false, true}) { + for (bool reduce_all : {false, true}) { + for (auto dim : reduce_dim) { + auto x_dims = DDim(std::vector({n, c, h, w})); + std::unique_ptr tester( + new ReduceSumComputeTester( + place, "def", dim, keep_dim, reduce_all, x_dims)); + arena::Arena arena(std::move(tester), place, 2e-5); + arena.TestPrecision(); + } + } + } + } + } + } + } +} + +TEST(ReduceSum, precision) { +#ifdef LITE_WITH_X86 + Place place(TARGET(kX86)); + test_reduce_sum(place); +#endif + // #ifdef LITE_WITH_ARM + // Place place(TARGET(kARM)); + // test_reduce_sum(place); + // #endif +} + +} // namespace lite +} // namespace paddle