提交 6c9d5b4a 编写于 作者: D dingweihao

[ARM] add expand_as op kernel,test=develop

上级 1aef8141
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "lite/core/profile/basic_profiler.h" #include "lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE #endif // LITE_WITH_PROFILE
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
using paddle::lite::profile::Timer; using paddle::lite::profile::Timer;
......
...@@ -7,6 +7,7 @@ add_kernel(squeeze_compute_host Host basic SRCS squeeze_compute.cc DEPS ${lite_k ...@@ -7,6 +7,7 @@ add_kernel(squeeze_compute_host Host basic SRCS squeeze_compute.cc DEPS ${lite_k
add_kernel(unsqueeze_compute_host Host basic SRCS unsqueeze_compute.cc DEPS ${lite_kernel_deps}) add_kernel(unsqueeze_compute_host Host basic SRCS unsqueeze_compute.cc DEPS ${lite_kernel_deps})
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps}) add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
add_kernel(expand_compute_host Host basic SRCS expand_compute.cc DEPS ${lite_kernel_deps}) add_kernel(expand_compute_host Host basic SRCS expand_compute.cc DEPS ${lite_kernel_deps})
add_kernel(expand_as_compute_host Host basic SRCS expand_as_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps}) add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps}) add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps}) add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/host/expand_as_compute.h"
#include <vector>
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
template <typename T, PrecisionType PType>
void ExpandAsCompute<T, PType>::Run() {
auto& param = this->template Param<operators::ExpandAsParam>();
const auto* x = param.X;
auto* out = param.Out;
const auto* target = param.Target;
std::vector<int> expand_times;
const T* src = x->template data<T>();
T* dst = out->template mutable_data<T>();
// int dims = expand_times.size();
for (int i = 0; i < target->dims().size(); ++i) {
int times = target->dims()[i] / x->dims()[i];
expand_times.push_back(times);
}
int dims = target->dims().size();
DDim in_shape = x->dims();
int inner_num = 1;
int i = dims - 1;
int outer_num = in_shape.count(0, i);
inner_num *= in_shape[i];
for (int j = 0; j < outer_num; ++j) {
for (int k = 0; k < expand_times[i]; ++k) {
memcpy(dst + (j * expand_times[i] + k) * inner_num,
src + j * inner_num,
sizeof(T) * inner_num);
}
}
inner_num *= expand_times[i];
for (int i = dims - 2; i >= 0; --i) {
int outer_num = in_shape.count(0, i);
inner_num *= in_shape[i];
for (int j = outer_num - 1; j >= 0; --j) {
for (int k = expand_times[i] - 1; k >= 0; --k) {
memcpy(dst + (j * expand_times[i] + k) * inner_num,
dst + j * inner_num,
sizeof(T) * inner_num);
}
}
inner_num *= expand_times[i];
}
}
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
using expand_as_float =
paddle::lite::kernels::host::ExpandAsCompute<float, PRECISION(kFloat)>;
REGISTER_LITE_KERNEL(expand_as, kHost, kFloat, kAny, expand_as_float, def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindInput("Target",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost),
PRECISION(kFloat),
DATALAYOUT(kAny))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace host {
template <typename T, PrecisionType PType>
class ExpandAsCompute
: public KernelLite<TARGET(kHost), PType, DATALAYOUT(kAny)> {
public:
void Run() override;
virtual ~ExpandAsCompute() = default;
};
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
...@@ -34,6 +34,7 @@ add_operator(fake_quant extra SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op ...@@ -34,6 +34,7 @@ add_operator(fake_quant extra SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op
add_operator(fake_dequant extra SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS}) add_operator(fake_dequant extra SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS})
add_operator(conv_transpose_op basic SRCS conv_transpose_op.cc DEPS ${op_DEPS}) add_operator(conv_transpose_op basic SRCS conv_transpose_op.cc DEPS ${op_DEPS})
add_operator(expand_op_lite basic SRCS expand_op.cc DEPS ${op_DEPS}) add_operator(expand_op_lite basic SRCS expand_op.cc DEPS ${op_DEPS})
add_operator(expand_as_op_lite basic SRCS expand_as_op.cc DEPS ${op_DEPS})
add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS}) add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS})
add_operator(unsqueeze_op_lite basic SRCS unsqueeze_op.cc DEPS ${op_DEPS}) add_operator(unsqueeze_op_lite basic SRCS unsqueeze_op.cc DEPS ${op_DEPS})
add_operator(stack_op basic SRCS stack_op.cc DEPS ${op_DEPS}) add_operator(stack_op basic SRCS stack_op.cc DEPS ${op_DEPS})
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/expand_as_op.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool ExpandAsOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.X);
CHECK_OR_FALSE(param_.Target);
CHECK_OR_FALSE(param_.Out);
int target_size = param_.Target->dims().size();
int x_dims_size = param_.X->dims().size();
CHECK_EQ(target_size, x_dims_size)
<< "The number of expand_times size must be qual to the rank of "
"Input(X).";
CHECK_LE(param_.X->dims().size(), 6u)
<< "The rank of Input(X) must not be greater than 6.";
return true;
}
bool ExpandAsOpLite::InferShapeImpl() const {
DDim out_dims(param_.X->dims());
for (size_t i = 0; i < param_.Target->dims().size(); ++i) {
// out_dims[i] *= param_.expand_times[i];
out_dims[i] = param_.Target->dims()[i];
}
param_.Out->Resize(out_dims);
return true;
}
bool ExpandAsOpLite::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
auto X_name = opdesc.Input("X").front();
auto Out_name = opdesc.Output("Out").front();
param_.X = GetVar<lite::Tensor>(scope, X_name);
param_.Out = GetMutableVar<lite::Tensor>(scope, Out_name);
auto Target_name = opdesc.Input("Target").front();
param_.Target = GetVar<lite::Tensor>(scope, Target_name);
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(expand_as, paddle::lite::operators::ExpandAsOpLite);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/core/op_lite.h"
namespace paddle {
namespace lite {
namespace operators {
class ExpandAsOpLite : public OpLite {
public:
ExpandAsOpLite() {}
explicit ExpandAsOpLite(const std::string &op_type) : OpLite(op_type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "expand_as"; }
private:
mutable ExpandAsParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
...@@ -1278,6 +1278,13 @@ struct ExpandParam : ParamBase { ...@@ -1278,6 +1278,13 @@ struct ExpandParam : ParamBase {
std::vector<int> expand_times{}; std::vector<int> expand_times{};
}; };
/// ----------------------- expand as operators ----------------------
struct ExpandAsParam : ParamBase {
const lite::Tensor* X{};
const lite::Tensor* Target{};
lite::Tensor* Out{};
};
/// ----------------------- matmul operators ---------------------- /// ----------------------- matmul operators ----------------------
struct MatMulParam : ParamBase { struct MatMulParam : ParamBase {
const lite::Tensor* X{}; const lite::Tensor* X{};
......
...@@ -86,6 +86,7 @@ endif() ...@@ -86,6 +86,7 @@ endif()
lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_as_compute SRCS expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_crf_decoding_compute SRCS crf_decoding_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif() endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
namespace paddle {
namespace lite {
class ExpandAsComputeTester : public arena::TestCase {
protected:
// common attributes for this op.
std::string x_ = "X";
std::string out_ = "Out";
// std::vector<int> expand_times_;
std::string target_ = "Target";
DDim dims_;
DDim target_dims_;
public:
ExpandAsComputeTester(const Place& place,
const std::string& alias,
DDim dims,
DDim target_dims)
: TestCase(place, alias), dims_(dims), target_dims_(target_dims) {}
void RunBaseline(Scope* scope) override {
const auto* input = scope->FindTensor(x_);
CHECK(input);
auto* out = scope->NewTensor(out_);
CHECK(out);
const auto* target = scope->FindTensor(target_);
DDim out_shape(input->dims());
DDim in_shape = input->dims();
std::vector<int> expand_times_;
for (size_t i = 0; i < target->dims().size(); ++i) {
int times = target->dims()[i] / input->dims()[i];
expand_times_.push_back(times);
}
for (size_t i = 0; i < expand_times_.size(); ++i) {
out_shape[i] *= expand_times_[i];
}
out->Resize(out_shape);
float* out_data = out->mutable_data<float>();
const float* input_data = input->data<float>();
std::vector<int> in_stride(in_shape.size(), 1),
out_stride(out_shape.size(), 1);
for (int i = in_shape.size() - 2; i >= 0; --i) {
in_stride[i] = in_shape[i + 1] * in_stride[i + 1];
}
for (int i = out_shape.size() - 2; i >= 0; --i) {
out_stride[i] = out_shape[i + 1] * out_stride[i + 1];
}
for (size_t out_id = 0; out_id < out_shape.production(); ++out_id) {
int in_id = 0;
for (int i = expand_times_.size() - 1; i >= 0; --i) {
int in_j = (out_id / out_stride[i]) % in_shape[i];
in_id += in_j * in_stride[i];
}
out_data[out_id] = input_data[in_id];
}
}
void PrepareOpDesc(cpp::OpDesc* op_desc) {
op_desc->SetType("expand_as");
op_desc->SetInput("X", {x_});
op_desc->SetInput("Target", {target_});
op_desc->SetOutput("Out", {out_});
}
void PrepareData() override {
std::vector<float> in_data(dims_.production());
std::vector<float> target_data(target_dims_.production());
for (int i = 0; i < dims_.production(); ++i) {
in_data[i] = i;
}
for (int i = 0; i < target_dims_.production(); ++i) {
target_data[i] = i;
}
SetCommonTensor(x_, dims_, in_data.data());
SetCommonTensor(target_, target_dims_, target_data.data());
}
};
void test_expand_as_3dim(Place place, float abs_error) {
// for (std::vector<int> expand_times : {std::vector<int>({2, 3, 1}),
// std::vector<int>({2, 2, 2}),
// std::vector<int>({3, 1, 2})}) {
for (int C : {3}) {
for (int H : {2}) {
for (int W : {4}) {
std::unique_ptr<arena::TestCase> tester(new ExpandAsComputeTester(
place, "def", DDim({C, H, W}), DDim({C * 2, H * 3, W * 1})));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
//}
}
void test_expand_as_4dim(Place place, float abs_error) {
// for (std::vector<int> expand_times : {std::vector<int>({2, 3, 1, 4}),
// std::vector<int>({2, 2, 2, 2}),
// std::vector<int>({3, 1, 2, 1})}) {
for (int N : {2}) {
for (int C : {3}) {
for (int H : {2}) {
for (int W : {4}) {
std::unique_ptr<arena::TestCase> tester(
new ExpandAsComputeTester(place,
"def",
DDim({N, C, H, W}),
DDim({N * 2, C * 3, H * 1, W * 4})));
arena::Arena arena(std::move(tester), place, abs_error);
arena.TestPrecision();
}
}
}
}
//}
}
TEST(ExpandAs, precision) {
float abs_error = 1e-5;
Place place;
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place = TARGET(kHost);
#elif defined(LITE_WITH_X86)
place = TARGET(kHost);
#else
return;
#endif
test_expand_as_3dim(place, abs_error);
test_expand_as_4dim(place, abs_error);
}
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册