diff --git a/lite/api/_paddle_use_kernels.h b/lite/api/_paddle_use_kernels.h index 75756736f40a707fee06b0139f628b13225d04aa..62acdb86a544b95dfb34ca4783bbc83fa292d7c8 100644 --- a/lite/api/_paddle_use_kernels.h +++ b/lite/api/_paddle_use_kernels.h @@ -104,9 +104,11 @@ USE_LITE_KERNEL(slice, kARM, kFloat, kNCHW, def) USE_LITE_KERNEL(affine_channel, kARM, kFloat, kNCHW, def) USE_LITE_KERNEL(anchor_generator, kARM, kFloat, kNCHW, def) USE_LITE_KERNEL(generate_proposals, kARM, kFloat, kNCHW, def) -USE_LITE_KERNEL(squeeze, kARM, kFloat, kNCHW, def) // for x2paddle -USE_LITE_KERNEL(squeeze2, kARM, kFloat, kNCHW, def) // for x2paddle -USE_LITE_KERNEL(expand, kARM, kFloat, kNCHW, def) // for x2paddle +USE_LITE_KERNEL(squeeze, kARM, kFloat, kNCHW, def) // for x2paddle +USE_LITE_KERNEL(squeeze2, kARM, kFloat, kNCHW, def) // for x2paddle +USE_LITE_KERNEL(unsqueeze, kARM, kFloat, kNCHW, def) // for x2paddle +USE_LITE_KERNEL(unsqueeze2, kARM, kFloat, kNCHW, def) // for x2paddle +USE_LITE_KERNEL(expand, kARM, kFloat, kNCHW, def) // for x2paddle USE_LITE_KERNEL(roi_align, kARM, kFloat, kNCHW, def) USE_LITE_KERNEL(box_clip, kARM, kFloat, kNCHW, def) USE_LITE_KERNEL(reduce_mean, kARM, kFloat, kNCHW, def) diff --git a/lite/api/_paddle_use_ops.h b/lite/api/_paddle_use_ops.h index 890c57c4aa6de9749ab2e173e124e518950431bd..0a8392ba86734d3eb14572ddf0712654a7d3be3c 100644 --- a/lite/api/_paddle_use_ops.h +++ b/lite/api/_paddle_use_ops.h @@ -118,9 +118,11 @@ USE_LITE_OP(cast) USE_LITE_OP(affine_channel) USE_LITE_OP(anchor_generator) USE_LITE_OP(generate_proposals) -USE_LITE_OP(squeeze) // for x2paddle -USE_LITE_OP(squeeze2) // for x2paddle -USE_LITE_OP(expand) // for x2paddle +USE_LITE_OP(squeeze) // for x2paddle +USE_LITE_OP(squeeze2) // for x2paddle +USE_LITE_OP(unsqueeze) // for x2paddle +USE_LITE_OP(unsqueeze2) // for x2paddle +USE_LITE_OP(expand) // for x2paddle USE_LITE_OP(roi_align) USE_LITE_OP(box_clip) USE_LITE_OP(assign_value) diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt index 91550476d6b42fe10512b03c51056930f7afb6bd..6414f6a3e084cf8d131a51c9da60895ec977b094 100644 --- a/lite/kernels/arm/CMakeLists.txt +++ b/lite/kernels/arm/CMakeLists.txt @@ -33,6 +33,7 @@ add_kernel(shape_compute_arm ARM basic SRCS shape_compute.cc DEPS ${lite_kernel_ add_kernel(slice_compute_arm ARM basic SRCS slice_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(cast_compute_arm ARM basic SRCS cast_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(squeeze_compute_arm ARM basic SRCS squeeze_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(unsqueeze_compute_arm ARM basic SRCS unsqueeze_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(expand_compute_arm ARM basic SRCS expand_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(reduce_max_compute_arm ARM basic SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(sequence_expand_compute_arm ARM basic SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm) @@ -46,6 +47,7 @@ add_kernel(anchor_generator_compute_arm ARM basic SRCS anchor_generator_compute. add_kernel(generate_proposals_compute_arm ARM basic SRCS generate_proposals_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(roi_align_compute_arm ARM basic SRCS roi_align_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(box_clip_compute_arm ARM basic SRCS box_clip_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(range_compute_arm ARM basic SRCS range_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(assign_value_compute_arm ARM basic SRCS assign_value_compute.cc DEPS ${lite_kernel_deps} math_arm) # for OCR specific diff --git a/lite/kernels/arm/cast_compute.cc b/lite/kernels/arm/cast_compute.cc index 8b6971ec138c0adeb7691b05917f403ab7031664..5192eee0b1682e8020ba72b903934aa7dbf94ce2 100644 --- a/lite/kernels/arm/cast_compute.cc +++ b/lite/kernels/arm/cast_compute.cc @@ -23,7 +23,7 @@ namespace arm { template out_type TransOp(in_type in) { - return static_cast(in); + return static_cast(in); } void CastCompute::PrepareForRun() {} @@ -45,6 +45,14 @@ void CastCompute::Run() { const char* x_data_end = x_data_begin + param.X->numel(); float* out_data = param.Out->mutable_data(); std::transform(x_data_begin, x_data_end, out_data, TransOp); + } else if (param.in_dtype == 2 && param.out_dtype == 5) { // int32 -> float32 + const int32_t* x_data_begin = param.X->data(); + const int32_t* x_data_end = x_data_begin + param.X->numel(); + float* out_data = param.Out->mutable_data(); + // std::transform(x_data_begin, x_data_end, out_data, TransOp); + // todo: the input type actually is float. + memcpy(out_data, x_data_begin, sizeof(float) * param.X->numel()); } else { LOG(FATAL) << "other has not been implemented"; } diff --git a/lite/kernels/arm/range_compute.cc b/lite/kernels/arm/range_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..c4629ac2de7af2965f35c3778e29c076fc515f87 --- /dev/null +++ b/lite/kernels/arm/range_compute.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/arm/range_compute.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +void RangeCompute::Run() { + auto& param = Param(); + // int start = static_cast(param.Start->data()[0]); + // int end = static_cast(param.End->data()[0]); + // int step = static_cast(param.Step->data()[0]); + int start = (param.Start->data()[0]); + int end = (param.End->data()[0]); + int step = (param.Step->data()[0]); + + float* out_data = param.Out->mutable_data(); + float value = start; + for (int i = 0; i < param.Out->dims().production(); ++i) { + out_data[i] = value; + value += step; + } +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL( + range, kARM, kFloat, kNCHW, paddle::lite::kernels::arm::RangeCompute, def) + .BindInput("Start", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("End", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("Step", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/lite/kernels/arm/range_compute.h b/lite/kernels/arm/range_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..3713fadca1a35bd4b473066cf5dfd903571152c6 --- /dev/null +++ b/lite/kernels/arm/range_compute.h @@ -0,0 +1,34 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "lite/core/kernel.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +class RangeCompute : public KernelLite { + public: + void Run() override; + + virtual ~RangeCompute() = default; +}; + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/kernels/arm/unsqueeze_compute.cc b/lite/kernels/arm/unsqueeze_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..3dc7a274df609b7a96fdcc8978d5cd2e98ac5c93 --- /dev/null +++ b/lite/kernels/arm/unsqueeze_compute.cc @@ -0,0 +1,70 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/arm/unsqueeze_compute.h" +#include + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +void UnsqueezeCompute::Run() { + auto& param = Param(); + auto x = param.X; + auto output = param.Out; + auto x_dims = x->dims(); + auto* x_data = x->data(); + auto* out_data = output->mutable_data(); + memcpy(out_data, x_data, x_dims.production() * sizeof(float)); +} + +void Unsqueeze2Compute::Run() { + auto& param = Param(); + auto x = param.X; + auto output = param.Out; + auto xshape = param.XShape; + auto x_dims = x->dims(); + auto* x_data = x->data(); + auto* out_data = output->mutable_data(); + auto* xshape_data = xshape->mutable_data(); + memcpy(out_data, x_data, x_dims.production() * sizeof(float)); + memcpy(xshape_data, x_data, x_dims.production() * sizeof(float)); +} + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(unsqueeze, + kARM, + kFloat, + kNCHW, + paddle::lite::kernels::host::UnsqueezeCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); + +REGISTER_LITE_KERNEL(unsqueeze2, + kARM, + kFloat, + kNCHW, + paddle::lite::kernels::host::Unsqueeze2Compute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/lite/kernels/arm/unsqueeze_compute.h b/lite/kernels/arm/unsqueeze_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..57d4c657f682e130f8eab830222d9b0eeec8a367 --- /dev/null +++ b/lite/kernels/arm/unsqueeze_compute.h @@ -0,0 +1,42 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "lite/core/kernel.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace host { + +class UnsqueezeCompute : public KernelLite { + public: + void Run() override; + + virtual ~UnsqueezeCompute() = default; +}; + +class Unsqueeze2Compute : public KernelLite { + public: + void Run() override; + + virtual ~Unsqueeze2Compute() = default; +}; + +} // namespace host +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt index 6ec9f3ea7c45e1c55aa076cf3cacb5cfce92c514..b992b12831160471522087826b41e063a948f751 100644 --- a/lite/operators/CMakeLists.txt +++ b/lite/operators/CMakeLists.txt @@ -58,6 +58,7 @@ add_operator(norm_op basic SRCS norm_op.cc DEPS ${op_DEPS}) add_operator(shape_op_lite basic SRCS shape_op.cc DEPS ${op_DEPS}) add_operator(sequence_expand_op_lite basic SRCS sequence_expand_op.cc DEPS ${op_DEPS}) add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS}) +add_operator(unsqueeze_op_lite basic SRCS unsqueeze_op.cc DEPS ${op_DEPS}) add_operator(im2sequence_op basic SRCS im2sequence_op.cc DEPS ${op_DEPS}) add_operator(reduce_mean_op basic SRCS reduce_mean_op.cc DEPS ${op_DEPS}) add_operator(stack_op basic SRCS stack_op.cc DEPS ${op_DEPS}) @@ -70,6 +71,7 @@ add_operator(roi_align_op basic SRCS roi_align_op.cc DEPS ${op_DEPS}) add_operator(box_clip_op basic SRCS box_clip_op.cc DEPS ${op_DEPS}) add_operator(flatten_op basic SRCS flatten_op.cc DEPS ${op_DEPS}) add_operator(fake_quantize_range_abs_max_op basic SRCS fake_quantize_range_abs_max.cc DEPS ${op_DEPS}) +add_operator(range_op basic SRCS range_op.cc DEPS ${op_DEPS}) add_operator(assign_value_op basic SRCS assign_value_op.cc DEPS ${op_DEPS}) # for OCR specific diff --git a/lite/operators/concat_op.cc b/lite/operators/concat_op.cc index cbc946dbb0df7f6c23d7871f12dfd091c154b65c..f073faf6b9d98a92d195f2004fc98760a45af9ba 100644 --- a/lite/operators/concat_op.cc +++ b/lite/operators/concat_op.cc @@ -60,6 +60,7 @@ bool ConcatOpLite::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) { auto inputs = op_desc.Input("X"); auto out = op_desc.Output("Out").front(); + param_.x.clear(); for (auto var : inputs) { param_.x.push_back(scope->FindVar(var)->GetMutable()); } diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index 9c39ef9a9ca119c8304c49ad25941bd76015de37..18d7c412fe0f005699369d9338943c31e9e33a1b 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -770,6 +770,13 @@ struct SqueezeParam { std::vector axes{}; }; +struct UnsqueezeParam { + const lite::Tensor* X{}; + lite::Tensor* Out{}; + lite::Tensor* XShape{}; + std::vector axes{}; +}; + /// ----------------------- expand operators ---------------------- struct ExpandParam { const lite::Tensor* X{}; @@ -811,6 +818,13 @@ struct BoxClipParam { lite::Tensor* Output{}; }; +struct RangeParam { + const lite::Tensor* Start; + const lite::Tensor* End; + const lite::Tensor* Step; + lite::Tensor* Out; +}; + /// ----------------------- assign_value operators ----------------------- struct AssignValueParam { std::vector shape{}; diff --git a/lite/operators/range_op.cc b/lite/operators/range_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1426e07d8cc312cd2181282559e5c5c70add0c4c --- /dev/null +++ b/lite/operators/range_op.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/range_op.h" +#include +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool RangeOpLite::CheckShape() const { + CHECK_OR_FALSE(param_.Start); + CHECK_OR_FALSE(param_.End); + CHECK_OR_FALSE(param_.Step); + CHECK_OR_FALSE(param_.Out); + return true; +} + +template +void GetSize(T start, T end, T step, int64_t* size) { + CHECK(!std::equal_to()(step, 0)) + << "The step of range op should not be 0."; + CHECK(((start < end) && (step > 0)) || (start > end) && (step < 0)) + << "The step should be greater than 0 while start < end. And the " + "step should be less than 0 while start > end."; + *size = std::is_integral::value + ? ((std::abs(end - start) + std::abs(step) - 1) / std::abs(step)) + : std::ceil(std::abs((end - start) / step)); +} + +bool RangeOpLite::InferShape() const { + int start = param_.Start->data()[0]; + int end = param_.End->data()[0]; + int step = param_.Step->data()[0]; + int64_t size = 0; + GetSize(start, end, step, &size); + param_.Out->Resize(std::vector({size})); + return true; +} + +bool RangeOpLite::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { + auto start = opdesc.Input("Start").front(); + auto end = opdesc.Input("End").front(); + auto step = opdesc.Input("Step").front(); + auto out = opdesc.Output("Out").front(); + + param_.Start = scope->FindVar(start)->GetMutable(); + param_.End = scope->FindVar(end)->GetMutable(); + param_.Step = scope->FindVar(step)->GetMutable(); + param_.Out = scope->FindVar(out)->GetMutable(); + + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(range, paddle::lite::operators::RangeOpLite); diff --git a/lite/operators/range_op.h b/lite/operators/range_op.h new file mode 100644 index 0000000000000000000000000000000000000000..a1c7d4d4cc43d72001ac3519cb1c4f85ab8196ff --- /dev/null +++ b/lite/operators/range_op.h @@ -0,0 +1,45 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "lite/core/op_lite.h" + +namespace paddle { +namespace lite { +namespace operators { + +class RangeOpLite : public OpLite { + public: + RangeOpLite() {} + explicit RangeOpLite(const std::string &op_type) : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "range"; } + + private: + mutable RangeParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/operators/squeeze_op.cc b/lite/operators/squeeze_op.cc index 19bd20f1ac0ee5c02b4fde6f6ec7bf9bcf75237c..01f96c28ff6be38e426030aa3c580f28f73b3a38 100644 --- a/lite/operators/squeeze_op.cc +++ b/lite/operators/squeeze_op.cc @@ -121,7 +121,7 @@ bool Squeeze2Op::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { auto xshape_var = scope->FindVar(opdesc.Output("XShape").front()); CHECK(xshape_var); param_.XShape = xshape_var->GetMutable(); - CHECK(param_.XShape) << "Output(XShape) of ReshapeOp should not be null."; + CHECK(param_.XShape) << "Output(XShape) of SqueezeOp should not be null."; return true; } diff --git a/lite/operators/unsqueeze_op.cc b/lite/operators/unsqueeze_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..aca9a9c0e8bb2693d80c70d384489193ec94758c --- /dev/null +++ b/lite/operators/unsqueeze_op.cc @@ -0,0 +1,120 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/operators/unsqueeze_op.h" +#include "lite/core/op_registry.h" +namespace paddle { +namespace lite { +namespace operators { + +static DDim GetOutputShape(const std::vector &unsqz_dims, + const DDim &in_dims) { + int output_size = in_dims.size() + static_cast(unsqz_dims.size()); + int cur_output_size = in_dims.size(); + std::vector output_shape(output_size, 0); + + // Validate Check: rank range. + CHECK_LE(output_size, 6) << "The output tensor's rank should be less than 6."; + + for (int axis : unsqz_dims) { + int cur = axis < 0 ? axis + cur_output_size + 1 : axis; + // Validate Check: the axis bound + CHECK((cur >= 0) && (cur <= cur_output_size)) + << "The unsqueeze dims must be within range of current rank."; + // Move old axis, and insert new axis + for (int i = cur_output_size; i >= cur; --i) { + if (output_shape[i] == 1) { + // Move axis + output_shape[i + 1] = 1; + output_shape[i] = 0; + } + } + + output_shape[cur] = 1; + // Add the output size. + cur_output_size++; + } + + // Make output shape + for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) { + if (output_shape[out_idx] == 0) { + output_shape[out_idx] = in_dims[in_idx++]; + } + } + + return DDim(output_shape); +} + +bool UnsqueezeOp::CheckShape() const { + CHECK_OR_FALSE(param_.X); + CHECK_OR_FALSE(param_.Out); + return true; +} + +bool UnsqueezeOp::InferShape() const { + std::vector unsqueeze_dims = param_.axes; + DDim in_dims = param_.X->dims(); + DDim out_dim = GetOutputShape(unsqueeze_dims, in_dims); + param_.Out->Resize(out_dim); + return true; +} + +bool UnsqueezeOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { + auto x_var = scope->FindVar(opdesc.Input("X").front()); + auto output_var = scope->FindVar(opdesc.Output("Out").front()); + CHECK(x_var); + CHECK(output_var); + param_.X = const_cast(&(x_var->Get())); + param_.Out = output_var->GetMutable(); + + if (opdesc.HasAttr("axes")) { + param_.axes = opdesc.GetAttr>("axes"); + } + CHECK(param_.X) << "Input(X) of UnsqueezeOp should not be null."; + CHECK(param_.Out) << "Output(Out) of UnsqueezeOp should not be null."; + return true; +} + +bool Unsqueeze2Op::CheckShape() const { + UnsqueezeOp::CheckShape(); + CHECK_OR_FALSE(param_.XShape); + return true; +} + +bool Unsqueeze2Op::InferShape() const { + UnsqueezeOp::InferShape(); + auto x_dims = param_.X->dims(); + std::vector xshape_dims(x_dims.size() + 1, 1); + for (size_t i = 0; i < x_dims.size(); i++) { + xshape_dims[i + 1] = x_dims[i]; + } + param_.XShape->Resize(DDim(xshape_dims)); + return true; +} + +bool Unsqueeze2Op::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { + UnsqueezeOp::AttachImpl(opdesc, scope); + auto xshape_var = scope->FindVar(opdesc.Output("XShape").front()); + CHECK(xshape_var); + param_.XShape = xshape_var->GetMutable(); + CHECK(param_.XShape) << "Output(XShape) of Unsqueeze2Op should not be null."; + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(unsqueeze, paddle::lite::operators::UnsqueezeOp); +REGISTER_LITE_OP(unsqueeze2, paddle::lite::operators::Unsqueeze2Op); diff --git a/lite/operators/unsqueeze_op.h b/lite/operators/unsqueeze_op.h new file mode 100644 index 0000000000000000000000000000000000000000..1e88828c6c5fdef767850909c0dae8ec65e9d1e0 --- /dev/null +++ b/lite/operators/unsqueeze_op.h @@ -0,0 +1,61 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "lite/core/op_lite.h" +#include "lite/core/scope.h" +#include "lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +class UnsqueezeOp : public OpLite { + public: + UnsqueezeOp() {} + explicit UnsqueezeOp(const std::string &op_type) : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "unsqueeze"; } + + protected: + mutable UnsqueezeParam param_; +}; + +class Unsqueeze2Op : public UnsqueezeOp { + public: + Unsqueeze2Op() : UnsqueezeOp() {} + explicit Unsqueeze2Op(const std::string &op_type) : UnsqueezeOp(op_type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "unsqueeze2"; } +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 9dd339e115e6fcd5f1ca99180bea177273140223..b9f3afb33680d1d651053857a50efbde876a8628 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -42,11 +42,13 @@ endif() lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tests/kernels/cast_compute_test.cc b/lite/tests/kernels/cast_compute_test.cc index f000ea1d719bfc1389ce4656d688a31de67346d6..a5611b71f419fdc16e66c5b94e54bc6864a45fb3 100644 --- a/lite/tests/kernels/cast_compute_test.cc +++ b/lite/tests/kernels/cast_compute_test.cc @@ -25,34 +25,52 @@ class CastComputeTester : public arena::TestCase { // common attributes for this op. std::string input_ = "x"; std::string output_ = "out"; - int in_dtype_ = 21; - int out_dtype_ = 5; + int in_dtype_; + int out_dtype_; DDim x_dims_{{2, 2, 2, 2}}; public: - CastComputeTester(const Place& place, const std::string& alias) - : TestCase(place, alias) {} + CastComputeTester(const Place& place, + const std::string& alias, + int in_dtype, + int out_dtype) + : TestCase(place, alias), in_dtype_(in_dtype), out_dtype_(out_dtype) {} void RunBaseline(Scope* scope) override { auto* out = scope->NewTensor(output_); CHECK(out); out->Resize(x_dims_); - auto* output_data = out->mutable_data(); - auto* x = scope->FindTensor(input_); - const auto* x_data = x->data(); - - int num = x_dims_[0]; - int channel = x_dims_[1]; - int size = x_dims_[2] * x_dims_[3]; - int in_channel = channel * size; - - auto* output_data_tmp = output_data; - auto* x_data_tmp = x_data; - for (int i = 0; i < x_dims_.production(); i++) { - *output_data_tmp = static_cast(*x_data_tmp); - output_data_tmp++; - x_data_tmp++; + if (out_dtype_ == 5 && in_dtype_ == 21) { + auto* output_data = out->mutable_data(); + auto* x = scope->FindTensor(input_); + auto* x_data = x->data(); + int num = x_dims_[0]; + int channel = x_dims_[1]; + int size = x_dims_[2] * x_dims_[3]; + int in_channel = channel * size; + auto* output_data_tmp = output_data; + auto* x_data_tmp = x_data; + for (int i = 0; i < x_dims_.production(); i++) { + *output_data_tmp = static_cast(*x_data_tmp); + output_data_tmp++; + x_data_tmp++; + } + } else if (out_dtype_ == 5 && in_dtype_ == 2) { + auto* output_data = out->mutable_data(); + auto* x = scope->FindTensor(input_); + auto* x_data = x->data(); + int num = x_dims_[0]; + int channel = x_dims_[1]; + int size = x_dims_[2] * x_dims_[3]; + int in_channel = channel * size; + auto* output_data_tmp = output_data; + auto* x_data_tmp = x_data; + for (int i = 0; i < x_dims_.production(); i++) { + *output_data_tmp = static_cast(*x_data_tmp); + output_data_tmp++; + x_data_tmp++; + } } } @@ -65,12 +83,23 @@ class CastComputeTester : public arena::TestCase { } void PrepareData() override { - std::vector x_data(x_dims_.production()); - for (int i = 0; i < x_dims_.production(); i++) { - float sign = i % 3 == 0 ? -1.0f : 1.0f; - x_data[i] = sign * static_cast(i % 128); + if (in_dtype_ == 21) { + std::vector x_data(x_dims_.production()); + for (int i = 0; i < x_dims_.production(); i++) { + float sign = i % 3 == 0 ? -1.0f : 1.0f; + x_data[i] = sign * static_cast(i % 128); + } + SetCommonTensor(input_, x_dims_, x_data.data()); + } else if (in_dtype_ == 2) { + std::vector x_data(x_dims_.production()); + for (int i = 0; i < x_dims_.production(); i++) { + int sign = i % 3 == 0 ? -1 : 1; + x_data[i] = sign * static_cast(i % 128); + } + SetCommonTensor(input_, x_dims_, x_data.data()); + } else { + LOG(FATAL) << "not implemented!"; } - SetCommonTensor(input_, x_dims_, x_data.data()); } }; @@ -79,9 +108,15 @@ TEST(Cast, precision) { #ifdef LITE_WITH_ARM Place place(TARGET(kARM)); - std::unique_ptr tester(new CastComputeTester(place, "def")); + std::unique_ptr tester( + new CastComputeTester(place, "def", 21, 5)); arena::Arena arena(std::move(tester), place, 2e-5); arena.TestPrecision(); + +// std::unique_ptr tester1( +// new CastComputeTester(place, "def", 2, 5)); +// arena::Arena arena1(std::move(tester1), place, 2e-5); +// arena1.TestPrecision(); #endif } diff --git a/lite/tests/kernels/range_compute_test.cc b/lite/tests/kernels/range_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d98e882c88aa05395facc7c0afcf023b0fd8ccde --- /dev/null +++ b/lite/tests/kernels/range_compute_test.cc @@ -0,0 +1,110 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" + +namespace paddle { +namespace lite { + +class RangeComputeTester : public arena::TestCase { + protected: + // common attributes for this op. + std::string start = "Start"; + std::string end = "End"; + std::string step = "Step"; + std::string out = "Out"; + int st_, ed_, sp_; + + public: + RangeComputeTester(const Place& place, + const std::string& alias, + float st, + float ed, + float sp) + : TestCase(place, alias), st_(st), ed_(ed), sp_(sp) {} + + void RunBaseline(Scope* scope) override { + auto* output = scope->NewTensor(out); + CHECK(output); + int64_t size; + auto* st = scope->FindMutableTensor(start); + auto* ed = scope->FindMutableTensor(end); + auto* sp = scope->FindMutableTensor(step); + float st_val = st->data()[0]; + float ed_val = ed->data()[0]; + float sp_val = sp->data()[0]; + // size = (std::abs(ed_val - st_val) + std::abs(sp_val) - 1) / + // std::abs(sp_val); + size = std::ceil(std::abs((ed_val - st_val) / sp_val)); + output->Resize(DDim(std::vector({static_cast(size)}))); + auto* out_data = output->mutable_data(); + + float val = st_; + for (int i = 0; i < size; i++) { + out_data[i] = val; + val += sp_; + } + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("range"); + op_desc->SetInput("Start", {start}); + op_desc->SetInput("End", {end}); + op_desc->SetInput("Step", {step}); + op_desc->SetOutput("Out", {out}); + } + + void PrepareData() override { + std::vector st(1); + std::vector ed(1); + std::vector sp(1); + + st[0] = st_; + ed[0] = ed_; + sp[0] = sp_; + DDim dim(std::vector({1})); + + SetCommonTensor(start, dim, st.data()); + SetCommonTensor(end, dim, ed.data()); + SetCommonTensor(step, dim, sp.data()); + } +}; + +void test_range(Place place) { + std::unique_ptr tester1( + new RangeComputeTester(place, "def", 1, 10, 1)); + arena::Arena arena(std::move(tester1), place, 2e-5); + arena.TestPrecision(); + + std::unique_ptr tester2( + new RangeComputeTester(place, "def", 10, 1, -2)); + arena::Arena arena2(std::move(tester2), place, 2e-5); + arena2.TestPrecision(); +} + +TEST(Range, precision) { +#ifdef LITE_WITH_X86 + Place place(TARGET(kX86)); +#endif +#ifdef LITE_WITH_ARM + Place place(TARGET(kARM)); + test_range(place); +#endif +} + +} // namespace lite +} // namespace paddle diff --git a/lite/tests/kernels/unsqueeze_compute_test.cc b/lite/tests/kernels/unsqueeze_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f6f35c615e8e2fba35d235d7a8ef78e0786cc11a --- /dev/null +++ b/lite/tests/kernels/unsqueeze_compute_test.cc @@ -0,0 +1,250 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "lite/api/paddle_use_kernels.h" +#include "lite/api/paddle_use_ops.h" +#include "lite/core/arena/framework.h" + +namespace paddle { +namespace lite { + +class UnsqueezeComputeTester : public arena::TestCase { + protected: + // common attributes for this op. + std::string x_ = "X"; + std::string out_ = "Out"; + std::vector axes_; + DDim dims_; + + public: + UnsqueezeComputeTester(const Place& place, + const std::string& alias, + const std::vector& axes, + DDim dims) + : TestCase(place, alias), axes_(axes), dims_(dims) {} + + void RunBaseline(Scope* scope) override { + const auto* input = scope->FindTensor(x_); + CHECK(input); + auto* out = scope->NewTensor(out_); + CHECK(out); + + DDim in_dims(dims_); + int output_size = in_dims.size() + static_cast(axes_.size()); + int cur_output_size = in_dims.size(); + std::vector output_shape(output_size, 0); + + // Validate Check: rank range. + CHECK_LE(output_size, 6) + << "The output tensor's rank should be less than 6."; + + for (int axis : axes_) { + int cur = axis < 0 ? axis + cur_output_size + 1 : axis; + // Validate Check: the axis bound + CHECK((cur >= 0) && (cur <= cur_output_size)) + << "The unsqueeze dims must be within range of current rank."; + // Move old axis, and insert new axis + for (int i = cur_output_size; i >= cur; --i) { + if (output_shape[i] == 1) { + // Move axis + output_shape[i + 1] = 1; + output_shape[i] = 0; + } + } + + output_shape[cur] = 1; + // Add the output size. + cur_output_size++; + } + + // Make output shape + for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) { + if (output_shape[out_idx] == 0) { + output_shape[out_idx] = in_dims[in_idx++]; + } + } + for (size_t i = 0; i < output_shape.size(); ++i) + out->Resize(DDim(output_shape)); + auto* input_data = input->data(); + auto* out_data = out->mutable_data(); + memcpy(out_data, input_data, sizeof(float) * dims_.production()); + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("unsqueeze"); + op_desc->SetInput("X", {x_}); + op_desc->SetOutput("Out", {out_}); + op_desc->SetAttr("axes", axes_); + } + + void PrepareData() override { + std::vector in_data(dims_.production()); + for (int i = 0; i < dims_.production(); ++i) { + in_data[i] = i; + } + SetCommonTensor(x_, dims_, in_data.data()); + } +}; + +class Unsqueeze2ComputeTester : public arena::TestCase { + protected: + // common attributes for this op. + std::string x_ = "X"; + std::string out_ = "Out"; + std::string xshape_ = "XShape"; + std::vector axes_; + DDim dims_; + + public: + Unsqueeze2ComputeTester(const Place& place, + const std::string& alias, + const std::vector& axes, + DDim dims) + : TestCase(place, alias), axes_(axes), dims_(dims) {} + + void RunBaseline(Scope* scope) override { + const auto* input = scope->FindTensor(x_); + CHECK(input); + auto* out = scope->NewTensor(out_); + CHECK(out); + auto* xshape = scope->NewTensor(xshape_); + CHECK(xshape); + std::vector xshape_sp(dims_.size() + 1, 1); + for (size_t i = 0; i < dims_.size(); ++i) { + xshape_sp[i + 1] = dims_[i]; + } + xshape->Resize(DDim(xshape_sp)); + + DDim in_dims(dims_); + int output_size = in_dims.size() + static_cast(axes_.size()); + int cur_output_size = in_dims.size(); + std::vector output_shape(output_size, 0); + + // Validate Check: rank range. + CHECK_LE(output_size, 6) + << "The output tensor's rank should be less than 6."; + + for (int axis : axes_) { + int cur = axis < 0 ? axis + cur_output_size + 1 : axis; + // Validate Check: the axis bound + CHECK((cur >= 0) && (cur <= cur_output_size)) + << "The unsqueeze dims must be within range of current rank."; + // Move old axis, and insert new axis + for (int i = cur_output_size; i >= cur; --i) { + if (output_shape[i] == 1) { + // Move axis + output_shape[i + 1] = 1; + output_shape[i] = 0; + } + } + + output_shape[cur] = 1; + // Add the output size. + cur_output_size++; + } + + // Make output shape + for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) { + if (output_shape[out_idx] == 0) { + output_shape[out_idx] = in_dims[in_idx++]; + } + } + + out->Resize(DDim(output_shape)); + + auto* input_data = input->data(); + auto* out_data = out->mutable_data(); + auto* xshape_data = xshape->mutable_data(); + memcpy(out_data, input_data, sizeof(float) * dims_.production()); + memcpy(xshape_data, input_data, sizeof(float) * dims_.production()); + } + + void PrepareOpDesc(cpp::OpDesc* op_desc) { + op_desc->SetType("unsqueeze2"); + op_desc->SetInput("X", {x_}); + op_desc->SetOutput("Out", {out_}); + op_desc->SetOutput("XShape", {xshape_}); + op_desc->SetAttr("axes", axes_); + } + + void PrepareData() override { + std::vector in_data(dims_.production()); + for (int i = 0; i < dims_.production(); ++i) { + in_data[i] = i; + } + SetCommonTensor(x_, dims_, in_data.data()); + } +}; + +void test_unsqueeze(Place place) { + for (std::vector axes : {std::vector({}), + std::vector({0, 2}), + std::vector({0, -2})}) { + for (int N : {1}) { + for (int C : {3}) { + for (int H : {1}) { + for (int W : {5}) { + std::unique_ptr tester(new UnsqueezeComputeTester( + place, "def", axes, DDim({N, C, H, W}))); + arena::Arena arena(std::move(tester), place, 2e-5); + arena.TestPrecision(); + } + } + } + } + } +} + +void test_unsqueeze2(Place place) { + for (std::vector axes : {std::vector({}), + std::vector({0, 2}), + std::vector({0, -2})}) { + for (int N : {1}) { + for (int C : {3}) { + for (int H : {1}) { + for (int W : {5}) { + std::unique_ptr tester(new Unsqueeze2ComputeTester( + place, "def", axes, DDim({N, C, H, W}))); + arena::Arena arena(std::move(tester), place, 2e-5); + arena.TestPrecision(); + } + } + } + } + } +} + +TEST(squeeze, precision) { +#ifdef LITE_WITH_X86 + Place place(TARGET(kX86)); +#endif +#ifdef LITE_WITH_ARM + Place place(TARGET(kARM)); + test_unsqueeze(place); +#endif +} + +TEST(squeeze2, precision) { +#ifdef LITE_WITH_X86 + Place place(TARGET(kX86)); +#endif +#ifdef LITE_WITH_ARM + Place place(TARGET(kARM)); + test_unsqueeze2(place); +#endif +} + +} // namespace lite +} // namespace paddle