Merge pull request #940 from xiebaiyuan/develop

add fssd ops impls #924

Merge pull request #940 from xiebaiyuan/develop
add fssd ops impls #924
7a3de8ab · xiebaiyuan · GitHub · 489d747d · 9c44f9ae · 7a3de8ab
14 changed file
--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -59,6 +59,7 @@ template <typename Dtype>
 void OperatorBase<Dtype>::Run() const {
  RunImpl();
 #ifdef PADDLE_MOBILE_DEBUG
+  DLOG << "-------------" << type_ << "----------------------------";
  vector<string> input_keys = GetInputKeys();
  for (const auto key : input_keys) {
    Tensor *input = GetVarValue<framework::LoDTensor>(key, inputs_, *scope_);

--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -73,6 +73,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
 #ifdef PADDLE_EXECUTOR_MULTITHREAD
  depManager.resize(blocks.size());
 #endif
+  DLOG << "executer in loaddable mode: " << loddable_;
  for (int i = 0; i < blocks.size(); ++i) {
    std::shared_ptr<framework::BlockDesc> block_desc = blocks[i];
    std::vector<std::shared_ptr<framework::OpDesc>> ops = block_desc->Ops();
@@ -82,7 +83,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
      auto op_base = framework::OpRegistry<Dtype>::CreateOp(
          op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
          program_.scope);
-      DLOG << "executer in loaddable mode: " << loddable_;
      // use pre_infershape to pre resize , but if u use an lod mode tensor u
      // need to resize in runtime
      if (!loddable_) {
@@ -176,6 +176,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
      type_size = 8;
      break;
    case framework::VARTYPE_TYPE_INT32:
+      memory = tensor->mutable_data<int32_t>();
      type_size = 4;
      break;
    case framework::VARTYPE_TYPE_INT64:
@@ -308,6 +309,9 @@ bool Executor<Dtype, P>::varInputMemory(
    }

    case framework::VARTYPE_TYPE_INT32: {
+      tensor = var->template GetMutable<framework::LoDTensor>();
+      tensor->template mutable_data<int32_t>();
+      is_mute_match = true;
      break;
    }


--- a/src/operators/bilinear_interp_op.cpp
+++ b/src/operators/bilinear_interp_op.cpp
@@ -20,8 +20,25 @@ namespace paddle_mobile {
 namespace operators {
 template <typename DeviceType, typename T>
 void BilinearOp<DeviceType, T>::InferShape() const {
-  // todo check
-  this->param_.Out()->Resize(this->param_.InputX()->dims());
+  PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr,
+                        "Input(X) of BilinearInterOp should not be null.");
+  PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr,
+                        "Output(Out) of BilinearInterOp should not be null.");
+
+  auto dim_x = this->param_.InputX()->dims();  // NCHW format
+  int out_h = this->param_.OutH();
+  int out_w = this->param_.OutW();
+  PADDLE_MOBILE_ENFORCE(dim_x.size() == 4, "X's dimension must be 4");
+
+  if (this->param_.InputOutPutSize() != nullptr) {
+    auto out_size_dim = this->param_.InputOutPutSize()->dims();
+
+    PADDLE_MOBILE_ENFORCE(out_size_dim.size() == 1,
+                          "OutSize's dimension size must be 1");
+    PADDLE_MOBILE_ENFORCE(out_size_dim[0] == 2, "OutSize's dim[0] must be 2");
+  }
+  std::vector<int64_t> dim_out({dim_x[0], dim_x[1], out_h, out_w});
+  this->param_.Out()->Resize(framework::make_ddim(dim_out));
 }

 }  // namespace operators

--- a/src/operators/flatten_op.cpp
+++ b/src/operators/flatten_op.cpp
@@ -18,10 +18,32 @@ limitations under the License. */

 namespace paddle_mobile {
 namespace operators {
+
 template <typename DeviceType, typename T>
 void FlattenOp<DeviceType, T>::InferShape() const {
-  // todo check
-  this->param_.Out()->Resize(this->param_.InputX()->dims());
+  PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr,
+                        "Input (X) of Flatten op should not be null.");
+  PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr,
+                        "Output (Output) of Flatten op should not be null.");
+
+  auto &axis = this->param_.Axis();
+  PADDLE_MOBILE_ENFORCE(axis >= 0,
+                        "The axis should be greater than or equal to 0.");
+
+  auto &in_dims = this->param_.InputX()->dims();
+  //  const auto &in_dims = ctx->GetInputDim("X");
+  PADDLE_MOBILE_ENFORCE(
+      axis <= in_dims.size(),
+      "The axis should be less than or equal to input tensor's rank.");
+
+  const auto &out_dims = GetOutputShape(axis, in_dims);
+  this->param_.Out()->Resize(in_dims);
+  // todo supprot lodtensor
+  //  if (in_dims[0] == out_dims[0]) {
+  //    // Only pass LoD when the first dimension of output and Input(X)
+  //    // are the same.
+  //    ctx->ShareLoD("X", "Out");
+  //  }
 }

 }  // namespace operators

--- a/src/operators/flatten_op.h
+++ b/src/operators/flatten_op.h
@@ -24,7 +24,21 @@ limitations under the License. */

 namespace paddle_mobile {
 namespace operators {
-
+inline std::vector<int32_t> GetOutputShape(const int axis,
+                                           const framework::DDim &in_dims) {
+  int64_t outer = 1, inner = 1;
+  for (int i = 0; i < in_dims.size(); ++i) {
+    if (i < axis) {
+      outer *= in_dims[i];
+    } else {
+      inner *= in_dims[i];
+    }
+  }
+  std::vector<int32_t> out_shape(2);
+  out_shape[0] = static_cast<int>(outer);
+  out_shape[1] = static_cast<int>(inner);
+  return out_shape;
+}
 using paddle_mobile::framework::Tensor;

 template <typename DeviceType, typename T>

--- a/src/operators/kernel/central-arm-func/bilinear_interp_arm_func.h
+++ b/src/operators/kernel/central-arm-func/bilinear_interp_arm_func.h
@@ -22,7 +22,68 @@ namespace paddle_mobile {
 namespace operators {

 template <typename P>
-void BilinearInterpCompute(const BilinearInterpParam<CPU>& param) {}
+void BilinearInterpCompute(const BilinearInterpParam<CPU>& param) {
+  auto out_dims = param.Out()->dims();
+  auto* input = param.InputX()->data<float>();
+  auto out_size_t = param.InputOutPutSize();
+
+  int out_h = param.OutH();
+  int out_w = param.OutW();
+  if (out_size_t != nullptr) {
+    auto out_size_data = out_size_t->data<int>();
+    out_h = out_size_data[0];
+    out_w = out_size_data[1];
+  }
+  auto* output = param.Out()->mutable_data<float>(
+      {out_dims[0], out_dims[1], out_h, out_w});
+  auto batch_size = param.InputX()->dims()[0];
+  auto channels = param.InputX()->dims()[1];
+  auto in_h = param.InputX()->dims()[2];
+  auto in_w = param.InputX()->dims()[3];
+
+  auto in_hw = in_h * in_w;
+  auto out_hw = out_h * out_w;
+  auto in_chw = channels * in_hw;
+  auto out_chw = channels * out_hw;
+
+  float ratio_h =
+      (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
+  float ratio_w =
+      (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+
+  if (in_h == out_h && in_w == out_w) {
+    memcpy(output, input, param.InputX()->numel() * sizeof(float));
+  } else {
+    for (int k = 0; k < batch_size; ++k) {  // loop for batches
+      for (int i = 0; i < out_h; ++i) {     // loop for images
+        int h = ratio_h * i;
+        int hid = (h < in_h - 1) ? 1 : 0;
+        float h1lambda = ratio_h * i - h;
+        float h2lambda = 1.f - h1lambda;
+
+        for (int j = 0; j < out_w; ++j) {
+          int w = ratio_w * j;
+          int wid = (w < in_w - 1) ? 1 : 0;
+          float w1lambda = ratio_w * j - w;
+          float w2lambda = 1.f - w1lambda;
+          // calculate four position for bilinear interpolation
+          const float* in_pos = &input[k * in_chw + h * in_w + w];
+          float* out_pos = &output[k * out_chw + i * out_w + j];
+
+          for (int c = 0; c < channels; ++c) {  // loop for channels
+            // bilinear interpolation
+            out_pos[0] = static_cast<float>(
+                h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[wid]) +
+                h1lambda * (w2lambda * in_pos[hid * in_w] +
+                            w1lambda * in_pos[hid * in_w + wid]));
+            in_pos += in_hw;
+            out_pos += out_hw;
+          }
+        }
+      }
+    }
+  }
+}

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/central-arm-func/flatten_arm_func.h
+++ b/src/operators/kernel/central-arm-func/flatten_arm_func.h
@@ -15,14 +15,29 @@ limitations under the License. */
 #ifdef FLATTEN_OP
 #pragma once

+#include <operators/kernel/reshape_kernel.h>
 #include <vector>
+#include "operators/flatten_op.h"
 #include "operators/op_param.h"

 namespace paddle_mobile {
 namespace operators {

 template <typename P>
-void FlattenCompute(const FlattenParam<CPU>& param) {}
+void FlattenCompute(const FlattenParam<CPU> &param) {
+  const auto *input_x = param.InputX();
+  const auto axis = param.Axis();
+  const auto &input_x_dims = input_x->dims();
+  auto *out = param.Out();
+
+  const auto &out_shape_v = GetOutputShape(axis, input_x_dims);
+  const framework::DDim &out_dim = ValidateShape(out_shape_v, input_x_dims);
+
+  out->Resize(out_dim);
+  out->mutable_data<float>();
+  framework::TensorCopy(*input_x, out);
+  out->Resize(out_dim);
+}

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/central-arm-func/shape_arm_func.h
+++ b/src/operators/kernel/central-arm-func/shape_arm_func.h
@@ -22,7 +22,15 @@ namespace paddle_mobile {
 namespace operators {

 template <typename P>
-void ShapeCompute(const ShapeParam<CPU>& param) {}
+void ShapeCompute(const ShapeParam<CPU>& param) {
+  auto* in_t = param.Input();
+  auto* out_t = param.Out();
+  auto out_data = out_t->mutable_data<int32_t>();
+  auto in_dims = in_t->dims();
+  for (int i = 0; i < in_dims.size(); ++i) {
+    out_data[i] = static_cast<int32_t>(in_dims[i]);
+  }
+}

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/central-arm-func/split_arm_func.h
+++ b/src/operators/kernel/central-arm-func/split_arm_func.h
@@ -21,8 +21,64 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {

+// Strided numel memory copy from src to dst by the specified axis
+//
+// For example, for a tensor dims [4, 20, 100], the strieded numel is
+// [8000, 2000, 100]
+//
+// NOTE: The src and dst tensor should have the same elements
+// except the specified axis.
+template <typename T>
+inline void StridedNumelCopyWithAxis(int64_t axis, T* dst,
+                                     const framework::DDim& dst_stride_numel,
+                                     const T* src,
+                                     const framework::DDim& src_stride_numel,
+                                     int64_t size) {
+  int64_t before = dst_stride_numel[0] / dst_stride_numel[axis];
+  int64_t src_after = src_stride_numel[axis];
+  int64_t dst_after = dst_stride_numel[axis];
+
+  PADDLE_MOBILE_ENFORCE(src_stride_numel.size() == dst_stride_numel.size(),
+                        "src and dst tensor should have the same dims size.");
+
+  for (int64_t i = 0; i < axis; ++i) {
+    if (i < axis) {
+      PADDLE_MOBILE_ENFORCE(src_stride_numel[i] / src_stride_numel[axis] ==
+                                dst_stride_numel[i] / dst_stride_numel[axis],
+                            "src and dst should have the same elements "
+                            "except the specified axis.");
+    } else if (i == axis) {
+      continue;
+    } else {
+      PADDLE_MOBILE_ENFORCE(src_stride_numel[i] == dst_stride_numel[i],
+                            "src and dst should have the same elements "
+                            "except the specified axis.");
+    }
+  }
+
+  for (int64_t i = 0; i < before; ++i) {
+    memory::Copy(dst + i * dst_after, src + i * src_after, sizeof(T) * size);
+  }
+}
+
 template <typename P>
-void SplitCompute(const SplitParam<CPU>& param) {}
+void SplitCompute(const SplitParam<CPU>& param) {
+  auto* in = param.InputX();
+  auto outs = param.Outs();
+  auto in_stride = framework::stride_numel(in->dims());
+  int64_t axis = param.Axis();
+
+  size_t input_offset = 0;
+  for (auto& out : outs) {
+    out->mutable_data<float>();
+    auto out_stride = framework::stride_numel(out->dims());
+
+    StridedNumelCopyWithAxis<float>(axis, out->data<float>(), out_stride,
+                                    in->data<float>() + input_offset, in_stride,
+                                    out_stride[axis]);
+    input_offset += out_stride[axis];
+  }
+}

 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -245,6 +245,12 @@ class OpParam {
    return GetVarValue<T>("Out", outputs, scope);
  }

+  template <typename T>
+  static vector<T *> OutMultiFrom(const VariableNameMap &outputs,
+                                  const Scope &scope) {
+    return GetMultiVarValue<T>("Out", outputs, scope);
+  }
+
  template <typename T>
  static T *OutputYFrom(const VariableNameMap &outputs, const Scope &scope) {
    return GetVarValue<T>("Y", outputs, scope);
@@ -2248,13 +2254,16 @@ class FlattenParam : public OpParam {
               const AttributeMap &attrs, const Scope &scope) {
    input_x_ = InputXFrom<GType>(inputs, scope);
    out_ = OutFrom<GType>(outputs, scope);
+    axis = GetAttr<int>("axis", attrs);
  }
  const RType *InputX() const { return input_x_; }
  RType *Out() const { return out_; }
+  const int &Axis() const { return axis; }

 private:
  RType *input_x_;
  RType *out_;
+  int axis;
 };
 #endif

@@ -2268,14 +2277,29 @@ class SplitParam : public OpParam {
  SplitParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
             const AttributeMap &attrs, const Scope &scope) {
    input_x_ = InputXFrom<GType>(inputs, scope);
-    out_ = OutFrom<GType>(outputs, scope);
+    outs_ = OutMultiFrom<GType>(outputs, scope);
+    axis = GetAttr<int>("axis", attrs);
+    num = GetAttr<int>("num", attrs);
+    sections = GetAttr<std::vector<int>>("sections", attrs);
+
+    //    for (int i = 0; i < outs_.size(); ++i) {
+    //      out_ts_.push_back(*scope.FindVar(outs_[i])->GetMutable());
+    //    }
  }
  const RType *InputX() const { return input_x_; }
-  RType *Out() const { return out_; }
+  std::vector<GType *> Outs() const { return outs_; }
+  int Axis() const { return axis; }
+  int Num() const { return num; }
+  std::vector<int> Sections() const { return sections; }
+  //  std::vector<GType> OutTs() const { return out_ts_; }

 private:
  RType *input_x_;
-  RType *out_;
+  std::vector<GType *> outs_;
+  int axis;
+  int num;
+  std::vector<int> sections;
+  //  std::vector<GType> out_ts_;
 };
 #endif

@@ -2292,14 +2316,21 @@ class BilinearInterpParam : public OpParam {
    input_x_ = InputXFrom<GType>(inputs, scope);
    input_outsize_ = InputOutSizeFrom<GType>(inputs, scope);
    out_ = OutFrom<GType>(outputs, scope);
+    out_h_ = GetAttr<int>("out_h", attrs);
+    out_w_ = GetAttr<int>("out_w", attrs);
  }
  const RType *InputX() const { return input_x_; }
+  const RType *InputOutPutSize() const { return input_outsize_; }
  RType *Out() const { return out_; }
+  int OutH() const { return out_h_; }
+  int OutW() const { return out_w_; }

 private:
  RType *input_x_;
  RType *input_outsize_;
  RType *out_;
+  int out_h_;
+  int out_w_;
 };
 #endif

@@ -2315,7 +2346,7 @@ class ShapeParam : public OpParam {
    input_ = InputFrom<GType>(inputs, scope);
    out_ = OutFrom<GType>(outputs, scope);
  }
-  const RType *InputX() const { return input_; }
+  const RType *Input() const { return input_; }
  RType *Out() const { return out_; }

 private:

--- a/src/operators/shape_op.cpp
+++ b/src/operators/shape_op.cpp
@@ -20,7 +20,11 @@ namespace paddle_mobile {
 namespace operators {
 template <typename DeviceType, typename T>
 void ShapeOp<DeviceType, T>::InferShape() const {
-  this->param_.Out()->Resize(this->param_.InputX()->dims());
+  PADDLE_MOBILE_ENFORCE(this->param_.Input() != nullptr,
+                        "Input (Input) of get_shape op should not be null.");
+  PADDLE_MOBILE_ENFORCE(this->param_.Out() != nullptr,
+                        "Output (Out) of get_shape op should not be null.");
+  this->param_.Out()->Resize({this->param_.Input()->dims().size()});
 }

 }  // namespace operators

--- a/src/operators/split_op.cpp
+++ b/src/operators/split_op.cpp
@@ -18,9 +18,62 @@ limitations under the License. */

 namespace paddle_mobile {
 namespace operators {
+
 template <typename DeviceType, typename T>
 void SplitOp<DeviceType, T>::InferShape() const {
-  this->param_.Out()->Resize(this->param_.InputX()->dims());
+  PADDLE_MOBILE_ENFORCE(this->param_.InputX() != nullptr,
+                        "Input(X) of SplitOp should not be null.");
+  //  std::string str;
+  //  str.size()
+  const auto &outs = this->param_.Outs();
+  PADDLE_MOBILE_ENFORCE(outs.size() >= 1UL,
+                        "Outputs(Out) of SplitOp should not be empty.");
+
+  auto in_dims = this->param_.InputX()->dims();
+  size_t axis = static_cast<size_t>(this->param_.Axis());
+  size_t num = static_cast<size_t>(this->param_.Num());
+
+  const auto &sections = this->param_.Sections();
+
+  const size_t outs_number = outs.size();
+  std::vector<framework::DDim> outs_dims;
+  outs_dims.reserve(outs_number);
+
+  if (num > 0) {
+    int64_t in_axis_dim = in_dims[axis];
+    PADDLE_MOBILE_ENFORCE(in_axis_dim % num == 0,
+                          "tensor split does not result"
+                          " in an equal division");
+    size_t out_axis_dim = in_axis_dim / num;
+    for (size_t i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = out_axis_dim;
+      outs_dims.push_back(dim);
+    }
+  } else if (sections.size() > 0) {
+    PADDLE_MOBILE_ENFORCE(sections.size() == outs_number,
+                          "tensor split sections size"
+                          "should be equal to output size.");
+    for (size_t i = 0; i < outs_number; ++i) {
+      auto dim = in_dims;
+      dim[axis] = sections[i];
+      outs_dims.push_back(dim);
+    }
+  }
+
+  PADDLE_MOBILE_ENFORCE(outs_dims.size() == outs.size(),
+                        "length==dims.size()  must be true!");
+  for (int j = 0; j < outs_dims.size(); ++j) {
+    outs[j]->Resize(outs_dims[j]);
+  }
+
+  //  todo lod impl
+  //  if (axis != 0) {
+  //    // Only pass LoD when not spliting along the first dim.
+  //    for (size_t i = 0; i < outs_number; ++i) {
+  //      ctx->ShareLoD("X", "Out", 0, i);
+  //    }
+  //  }
 }

 }  // namespace operators

--- a/src/operators/split_op.h
+++ b/src/operators/split_op.h
@@ -44,7 +44,6 @@ class SplitOp : public framework::OperatorWithKernel<
      operators::SplitKernel<DeviceType, T>>::OperatorWithKernel;
  void InferShape() const override;
 };
-
 }  // namespace operators
 }  // namespace paddle_mobile


--- a/test/net/test_mobilenet_025_fssd.cpp
+++ b/test/net/test_mobilenet_025_fssd.cpp
@@ -23,7 +23,7 @@ int main() {
  //  ../../../test/models/mobilenet
  auto time1 = time();
  if (paddle_mobile.Load(std::string(g_fluid_fssd_new) + "/model",
-                         std::string(g_fluid_fssd_new) + "/params", false)) {
+                         std::string(g_fluid_fssd_new) + "/params", true)) {
    auto time2 = time();
    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;