add split op and arm kernel

test=develop

add split op and arm kernel
test=develop
a477fdc6 · zhupengyang · Tensor Tang · 32aceef1 · a477fdc6 · a477fdc6
11 changed file
--- a/paddle/fluid/lite/arm/math/CMakeLists.txt
+++ b/paddle/fluid/lite/arm/math/CMakeLists.txt
@@ -31,5 +31,6 @@ cc_library(math_arm SRCS
    conv_gemmlike.cc
    conv_winograd_3x3.cc
    conv_winograd.cc
+    split.cc
    DEPS ${lite_kernel_deps} eigen3)
--- a/paddle/fluid/lite/arm/math/split.cc
+++ b/paddle/fluid/lite/arm/math/split.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/lite/arm/math/split.h"
+#include <algorithm>
+#include "paddle/fluid/lite/arm/math/funcs.h"
+namespace paddle {
+namespace lite {
+namespace arm {
+namespace math {
+template <>
+void split_cpy<float>(const float* din, float* dout, int num) {
+  int cnt = num >> 4;
+  int remain = num % 16;
+#pragma omp parallel for
+  for (int i = 0; i < cnt; i++) {
+    const float* din_ptr = din + (i << 4);
+    float* dout_ptr = dout + (i << 4);
+    float32x4_t din0 = vld1q_f32(din_ptr);
+    float32x4_t din1 = vld1q_f32(din_ptr + 4);
+    float32x4_t din2 = vld1q_f32(din_ptr + 8);
+    float32x4_t din3 = vld1q_f32(din_ptr + 12);
+    vst1q_f32(dout_ptr, din0);
+    vst1q_f32(dout_ptr + 4, din1);
+    vst1q_f32(dout_ptr + 8, din2);
+    vst1q_f32(dout_ptr + 12, din3);
+  }
+  if (remain > 0) {
+    const float* din_ptr = din + (cnt << 4);
+    float* dout_ptr = dout + (cnt << 4);
+    for (int i = 0; i < remain; i++) {
+      *dout_ptr = *din_ptr;
+      dout_ptr++;
+      din_ptr++;
+    }
+  }
+}
+template <>
+void split<float>(const float* din, std::vector<lite::Tensor*>* dout,
+                  const int axis, const std::vector<int>& in_strides) {
+  int input_offset = 0;
+  for (auto out : *dout) {
+    auto out_dim = out->dims();
+    std::vector<int> out_strides(out_dim.size());
+    out_strides[out_dim.size() - 1] = out_dim[out_dim.size() - 1];
+    for (int i = out_dim.size() - 2; i >= 0; --i) {
+      out_strides[i] = out_strides[i + 1] * out_dim[i];
+    }
+    float* out_data = out->mutable_data<float>();
+    int before = out_strides[0] / out_strides[axis];
+    int in_after = in_strides[axis];
+    int out_after = out_strides[axis];
+    for (int i = 0; i < before; ++i) {
+      split_cpy(din + input_offset + i * in_after, out_data + i * out_after,
+                out_after);
+    }
+    input_offset += out_strides[axis];
+  }
+}
+}  // namespace math
+}  // namespace arm
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/arm/math/split.h
+++ b/paddle/fluid/lite/arm/math/split.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <vector>
+#include "paddle/fluid/lite/core/op_lite.h"
+namespace paddle {
+namespace lite {
+namespace arm {
+namespace math {
+template <typename T>
+void split_cpy(const T* din, T* dout, int num);
+template <typename T>
+void split(const T* din, std::vector<lite::Tensor*>* dout, const int axis,
+           const std::vector<int>& in_strides);
+}  // namespace math
+}  // namespace arm
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt
+++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt
@@ -12,6 +12,7 @@ cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps}
 cc_library(conv_compute_arm SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm)
 cc_library(elementwise_add_compute_arm SRCS elementwise_add_compute.cc DEPS ${lite_kernel_deps} math_arm)
 cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
+cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm)
 lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm)
 lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm)
@@ -19,6 +20,7 @@ lite_cc_test(test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_
 lite_cc_test(test_conv_compute_arm SRCS conv_compute_test.cc DEPS conv_compute_arm)
 lite_cc_test(test_elementwise_add_compute_arm SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_arm)
 lite_cc_test(test_pool_compute_arm SRCS pool_compute_test.cc DEPS pool_compute_arm)
+lite_cc_test(test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm)
 set(arm_kernels
    fc_compute_arm
@@ -29,6 +31,7 @@ set(arm_kernels
    conv_compute_arm
    elementwise_add_compute_arm
    pool_compute_arm
+    split_compute_arm
    )
 set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels")

--- a/paddle/fluid/lite/kernels/arm/split_compute.cc
+++ b/paddle/fluid/lite/kernels/arm/split_compute.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/lite/kernels/arm/split_compute.h"
+#include <vector>
+#include "paddle/fluid/lite/arm/math/funcs.h"
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+void SplitCompute::Run() {
+  auto& param = Param<operators::SplitParam>();
+  const float* din = param.x->data<float>();
+  auto* dout = param.output;
+  auto in_dim = param.x->dims();
+  std::vector<int> in_strides(in_dim.size());
+  in_strides[in_dim.size() - 1] = in_dim[in_dim.size() - 1];
+  for (int i = in_dim.size() - 2; i >= 0; --i) {
+    in_strides[i] = in_strides[i + 1] * in_dim[i];
+  }
+  lite::arm::math::split(din, dout, param.axis, in_strides);
+}
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+REGISTER_LITE_KERNEL(split, kARM, kFloat, kNCHW,
+                     paddle::lite::kernels::arm::SplitCompute, def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
+    .Finalize();
--- a/paddle/fluid/lite/kernels/arm/split_compute.h
+++ b/paddle/fluid/lite/kernels/arm/split_compute.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <algorithm>
+#include "paddle/fluid/lite/core/kernel.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+class SplitCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
+ public:
+  void Run() override;
+  virtual ~SplitCompute() = default;
+};
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
--- a/paddle/fluid/lite/kernels/arm/split_compute_test.cc
+++ b/paddle/fluid/lite/kernels/arm/split_compute_test.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/lite/kernels/arm/split_compute.h"
+#include <gtest/gtest.h>
+#include <limits>
+#include <vector>
+#include "paddle/fluid/lite/core/op_registry.h"
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace arm {
+void splite_resize_out(const lite::Tensor* din,
+                       std::vector<lite::Tensor*>* dout, int axis, int num,
+                       const std::vector<int>& sections) {
+  for (auto out : *dout) delete out;
+  dout->clear();
+  auto in_dims = din->dims();
+  int outs_number;
+  if (num > 0) {
+    outs_number = num;
+  } else {
+    outs_number = sections.size();
+  }
+  for (int i = 0; i < outs_number; i++) {
+    dout->push_back(new lite::Tensor);
+  }
+  std::vector<lite::DDimLite> outs_dims;
+  outs_dims.reserve(outs_number);
+  if (num > 0) {
+    int out_axis_dim = in_dims[axis] / num;
+    for (int i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = out_axis_dim;
+      outs_dims.push_back(dim);
+    }
+  } else if (sections.size() > 0) {
+    for (size_t i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = sections[i];
+      outs_dims.push_back(dim);
+    }
+  }
+  for (int j = 0; j < outs_dims.size(); ++j) {
+    (*dout)[j]->Resize(outs_dims[j]);
+  }
+}
+template <typename dtype>
+void split_compute_ref(const operators::SplitParam& param) {
+  const dtype* din = param.x->mutable_data<const dtype>();
+  auto& dout = param.output;
+  auto in_dim = param.x->dims();
+  int axis = param.axis;
+  std::vector<int> in_strides(in_dim.size());
+  in_strides[in_dim.size() - 1] = in_dim[in_dim.size() - 1];
+  for (int i = in_dim.size() - 2; i >= 0; --i) {
+    in_strides[i] = in_strides[i + 1] * in_dim[i];
+  }
+  int input_offset = 0;
+  for (auto out : *dout) {
+    auto out_dim = out->dims();
+    std::vector<int> out_strides(out_dim.size());
+    out_strides[out_dim.size() - 1] = out_dim[out_dim.size() - 1];
+    for (int i = out_dim.size() - 2; i >= 0; --i) {
+      out_strides[i] = out_strides[i + 1] * out_dim[i];
+    }
+    dtype* out_data = out->mutable_data<dtype>();
+    int before = out_strides[0] / out_strides[axis];
+    int in_after = in_strides[axis];
+    int out_after = out_strides[axis];
+    for (int i = 0; i < before; ++i) {
+      std::memcpy(out_data + i * out_after, din + input_offset + i * in_after,
+                  sizeof(dtype) * out_after);
+    }
+    input_offset += out_strides[axis];
+  }
+}
+TEST(split_arm, init) {
+  SplitCompute split;
+  ASSERT_EQ(split.precision(), PRECISION(kFloat));
+  ASSERT_EQ(split.target(), TARGET(kARM));
+}
+TEST(split_arm, compute) {
+  SplitCompute split;
+  operators::SplitParam param;
+  lite::Tensor x;
+  std::vector<lite::Tensor*> output;
+  std::vector<lite::Tensor*> output_ref;
+  for (auto n : {1, 3, 4}) {
+    for (auto c : {1, 3, 4}) {
+      for (auto h : {1, 3, 4}) {
+        for (auto w : {1, 3, 4}) {
+          for (auto axis : {0, 1, 2, 3}) {
+            for (auto num : {0, 1, 2, 3}) {
+              for (auto sections :
+                   {std::vector<int>{1, 1, 1}, std::vector<int>{2, 2},
+                    std::vector<int>{1, 2}}) {
+                auto x_dim = DDim(std::vector<int64_t>({n, c, h, w}));
+                x.Resize(x_dim);
+                if ((num != 0 && x_dim[axis] % num != 0) ||
+                    (num == 0 && x_dim[axis] % sections.size() != 0))
+                  continue;
+                auto* x_data = x.mutable_data<float>();
+                for (int i = 0; i < x.dims().production(); i++) {
+                  x_data[i] = i;
+                }
+                splite_resize_out(&x, &output, axis, num, sections);
+                splite_resize_out(&x, &output_ref, axis, num, sections);
+                param.x = &x;
+                param.axis = axis;
+                param.num = num;
+                param.sections = &sections;
+                param.output = &output;
+                split.SetParam(param);
+                split.Run();
+                param.output = &output_ref;
+                split_compute_ref<float>(param);
+                for (int i = 0; i < output.size(); i++) {
+                  float* output_data = output[i]->mutable_data<float>();
+                  float* output_ref_data = output_ref[i]->mutable_data<float>();
+                  for (int j = 0; j < output[i]->dims().production(); j++) {
+                    EXPECT_NEAR(output_data[j], output_ref_data[j], 1e-5);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+TEST(split, retrive_op) {
+  auto split =
+      KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("split");
+  ASSERT_FALSE(split.empty());
+  ASSERT_TRUE(split.front());
+}
+}  // namespace arm
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+USE_LITE_KERNEL(split, kARM, kFloat, kNCHW, def);
--- a/paddle/fluid/lite/operators/CMakeLists.txt
+++ b/paddle/fluid/lite/operators/CMakeLists.txt
@@ -19,6 +19,7 @@ cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS})
 cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite)
 cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS})
 cc_library(concat_op_lite SRCS concat_op.cc DEPS ${op_DEPS})
+cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS})
 set(ops_lite
        conv_op_lite
@@ -38,6 +39,7 @@ set(ops_lite
        activation_ops_lite
        dropout_op_lite
        concat_op_lite
+	split_op_lite
        PARENT_SCOPE)
 lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc 

--- a/paddle/fluid/lite/operators/op_params.h
+++ b/paddle/fluid/lite/operators/op_params.h
@@ -174,6 +174,15 @@ struct DropoutParam {
  std::string dropout_implementation{"downgrade_in_infer"};
 };
+// For Split op
+struct SplitParam {
+  lite::Tensor* x{};
+  std::vector<lite::Tensor*>* output{};
+  int axis{-1};
+  int num{0};
+  std::vector<int>* sections;
+};
 /// ----------------------- element wise operators ----------------------
 struct ElementwiseParam {
  const lite::Tensor* X{};

--- a/paddle/fluid/lite/operators/split_op.cc
+++ b/paddle/fluid/lite/operators/split_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "paddle/fluid/lite/operators/split_op.h"
+#include "paddle/fluid/lite/core/op_registry.h"
+namespace paddle {
+namespace lite {
+namespace operators {
+bool SplitOp::CheckShape() const {
+  CHECK_OR_FALSE(param_.x);
+  CHECK_OR_FALSE(param_.output);
+  auto x_dims = param_.x->dims();
+  auto x_rank = x_dims.size();
+  CHECK_OR_FALSE(param_.axis >= -static_cast<int>(x_rank) &&
+                 param_.axis < static_cast<int>(x_rank));
+  return true;
+}
+bool SplitOp::InferShape() const {
+  const auto &outs = param_.output;
+  auto in_dims = param_.x.dims();
+  int axis = param_.axis;
+  int num = param_.num;
+  const auto &sections = param_.sections;
+  const int outs_number = outs.size();
+  std::vector<lite::DDimLite> outs_dims;
+  outs_dims.reserve(outs_number);
+  if (num > 0) {
+    int out_axis_dim = in_dims[axis] / num;
+    for (int i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = out_axis_dim;
+      outs_dims.push_back(dim);
+    }
+  } else if (sections.size() > 0) {
+    for (size_t i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = sections[i];
+      outs_dims.push_back(dim);
+    }
+  }
+  for (int j = 0; j < outs_dims.size(); ++j) {
+    outs[j]->Resize(outs_dims[j]);
+  }
+  return true;
+}
+bool SplitOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
+  param_.axis = opdesc.GetAttr<int>("axis");
+  param_.num = opdesc.GetAttr<int>("num");
+  param_.sections = opdesc.GetAttr<std::vector<int>>("sections");
+  param_.x = const_cast<lite::Tensor *>(
+      &scope->FindVar(opdesc.Input("X").front())->Get<lite::Tensor>());
+  auto outs = op_desc.Output("Out");
+  for (auto var : outs) {
+    param_.output.push_back(scope->FindVar(var)->GetMutable<lite::Tensor>());
+  }
+  return true;
+}
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+REGISTER_LITE_OP(softmax, paddle::lite::operators::SoftmaxOp);
--- a/paddle/fluid/lite/operators/split_op.h
+++ b/paddle/fluid/lite/operators/split_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <string>
+#include <vector>
+#include "paddle/fluid/lite/core/op_lite.h"
+#include "paddle/fluid/lite/core/scope.h"
+#include "paddle/fluid/lite/utils/all.h"
+namespace paddle {
+namespace lite {
+namespace operators {
+class SoftmaxOp : public OpLite {
+ public:
+  SplitOp() {}
+  explicit SplitOp(const std::string &op_type) : OpLite(op_type) {}
+  bool CheckShape() const override;
+  bool InferShape() const override;
+  bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
+  void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
+  std::string DebugString() const override { return "split"; }
+ private:
+  mutable SplitParam param_;
+};
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle