From 63df309138134d40bded1ef6f1dd888f4f7e7546 Mon Sep 17 00:00:00 2001
From: hjchen2 <chenhoujiangcug@gmail.com>
Date: Sat, 30 Mar 2019 21:57:22 +0800
Subject: [PATCH] improve program optimize, add assign value and flatten2 op

---
 src/framework/load_ops.h                      |  4 +
 .../program-optimize/program_optimize.cpp     | 82 +++++++------------
 src/operators/assign_value_op.cpp             | 37 +++++++++
 src/operators/assign_value_op.h               | 33 ++++++++
 src/operators/flatten_op.cpp                  | 13 +--
 src/operators/flatten_op.h                    | 11 ++-
 .../kernel/arm/assign_value_kernel.cpp        | 73 +++++++++++++++++
 src/operators/kernel/assign_value_kernel.h    | 53 ++++++++++++
 src/operators/kernel/beam_search_kernel.h     |  3 -
 src/operators/kernel/one_hot_kernel.h         |  3 -
 src/operators/op_param.h                      |  6 ++
 tools/op.cmake                                |  4 +
 12 files changed, 251 insertions(+), 71 deletions(-)
 create mode 100644 src/operators/assign_value_op.cpp
 create mode 100644 src/operators/assign_value_op.h
 create mode 100644 src/operators/kernel/arm/assign_value_kernel.cpp
 create mode 100644 src/operators/kernel/assign_value_kernel.h
diff --git a/src/framework/load_ops.h b/src/framework/load_ops.h
index 983a544cda..88dc901b4c 100644
--- a/src/framework/load_ops.h
+++ b/src/framework/load_ops.h
@@ -152,6 +152,7 @@ LOAD_OP1(prelu, CPU);
 #endif
 #ifdef FLATTEN_OP
 LOAD_OP1(flatten, CPU);
+LOAD_OP1(flatten2, CPU);
 #endif
 #ifdef FUSION_CONVBNADDRELU_OP
 LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
@@ -331,3 +332,6 @@ LOAD_OP1(pad2d, CPU);
 #ifdef ONE_HOT_OP
 LOAD_OP1(one_hot, CPU);
 #endif
+#ifdef ASSIGN_VALUE_OP
+LOAD_OP1(assign_value, CPU);
+#endif
diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp
index ed523a9851..eba27314ad 100644
--- a/src/framework/program/program-optimize/program_optimize.cpp
+++ b/src/framework/program/program-optimize/program_optimize.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "framework/program/program-optimize/program_optimize.h"
 #include <algorithm>
+#include <utility>
 #include "framework/program/program-optimize/fusion_op_register.h"
 
 namespace paddle_mobile {
@@ -22,7 +23,6 @@ namespace framework {
 
 std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
     std::shared_ptr<ProgramDesc> ori_des, bool add_split) {
-  //  ProgramDesc *optimize_program = new ProgramDesc(*ori_des);
   std::shared_ptr<ProgramDesc> optimize_program =
       std::make_shared<ProgramDesc>(*ori_des);
   current_block_ = optimize_program->Blocks().size();
@@ -35,51 +35,35 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
             std::pair<std::shared_ptr<Node>,
                       std::unordered_map<std::string, std::shared_ptr<Node>>>>>
         type_map;
-
-    std::unordered_map<std::string, bool> output_has;
-
     std::vector<std::shared_ptr<Node>> nodes;
-
     std::shared_ptr<Node> begin_node;
+
     auto block = optimize_program->Block(i);
-    //        DLOG << " ops size: " << block->Ops().size();
     for (int j = 0; j < block->Ops().size(); ++j) {
       auto op = block->Ops()[j];
-      auto op_type = op->Type();
-      if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
-        LOG(kLOG_ERROR) << "has not support op return null "
-                        << " op type: " << op->Type();
-        return nullptr;
-      }
-
       std::shared_ptr<Node> node = std::make_shared<Node>(op);
-      nodes.push_back(node);
-
-      //
-      type_map[op->Type()].push_back({node, output_nodes});
-
       if (j == 0) {
         begin_node = node;
       }
 
-      auto input_keys = op_input_output_key.at(op->Type()).first;
-      for (auto input_key : input_keys) {
-        auto op_inputs = op->Input(input_key);
-        for (int l = 0; l < op_inputs.size(); ++l) {
-          std::string input_key = op_inputs[l];
-          if (output_nodes.find(input_key) != output_nodes.end()) {
-            auto input_node = output_nodes[input_key];
+      const std::string op_type = op->Type();
+      nodes.push_back(node);
+      type_map[op_type].push_back({node, output_nodes});
+      const VariableNameMap &op_inputs = op->GetInputs();
+      const VariableNameMap &op_outpus = op->GetOutputs();
+
+      for (const auto &input : op_inputs) {
+        for (const auto &input_name : input.second) {
+          if (output_nodes.find(input_name) != output_nodes.end()) {
+            auto input_node = output_nodes[input_name];
             *input_node > node;
           }
         }
       }
 
-      auto output_keys = op_input_output_key.at(op_type).second;
-
-      for (auto output_key : output_keys) {
-        auto op_outputs = op->Output(output_key);
-        for (int k = 0; k < op_outputs.size(); ++k) {
-          output_nodes[op_outputs[k]] = node;
+      for (const auto &output : op_outpus) {
+        for (const auto &output_name : output.second) {
+          output_nodes[output_name] = node;
         }
       }
     }
@@ -97,14 +81,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
 
         auto depth = matcher->BeginNode().Depth();
         auto sub_node = match_node->To(depth);
-        //        DLOG << " sub node: " << *sub_node;
+        //  DLOG << " sub node: " << *sub_node;
         if (*sub_node == matcher->BeginNode()) {
           bool can_folder = true;
 
           auto relationship_map = sub_node->Relationship();
 
           for (auto to_check : matcher->NeedCheck()) {
-            //            if (node_has)
             auto nodes = (*sub_node)[to_check.first];
             for (auto node : nodes) {
               auto inputs_to_check =
@@ -126,13 +109,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
             continue;
           }
 
-          //          DLOG << " match success " << " fusion node: \n" <<
-          //          matcher->BeginNode() << "\nsub node: \n" << *sub_node;
-          //          DLOG << "match node\n"<< *match_node;
-
           std::vector<std::shared_ptr<Node>> removed_nodes;
           matcher->FolderNodes(match_node.get(), &removed_nodes);
-
           for (int k = removed_nodes.size() - 1; k >= 0; --k) {
             auto removed_node = removed_nodes[k];
             auto removed_ite =
@@ -170,12 +148,12 @@ void ProgramOptimize::GenerateOps(
     Node *current_node) {
   if (current_node->inputs_.size() > 1 &&
       input_node != current_node->inputs_.back()) {
-    DLOG << " current type " << current_node->type_;
+    DLOG << " current type " << current_node->Type();
 
     DLOG << " inputs size of current node > 0 ";
 
     for (int i = 0; i < current_node->inputs_.size(); ++i) {
-      DLOG << " input i: " << current_node->inputs_[i]->type_;
+      DLOG << " input i: " << current_node->inputs_[i]->Type();
     }
 
     return;
@@ -201,9 +179,11 @@ void ProgramOptimize::GenerateOps(
   }
 
   bool can_add_split = false;
+  const auto current_desc = current_node->OpDescOfNode();
+  const VariableNameMap &current_op_inputs = current_desc->GetInputs();
+  const VariableNameMap &current_op_outputs = current_desc->GetOutputs();
   // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
-  if (current_node->outputs_.size() > 1 &&
-      op_input_output_key[current_node->op_desc_->type_].second.size() == 1) {
+  if (current_node->outputs_.size() > 1 && current_op_outputs.size() == 1) {
     can_add_split = true;
 
     // 遍历当前节点的 output 节点
@@ -217,18 +197,15 @@ void ProgramOptimize::GenerateOps(
 
       //与节点关联的 OpDesc
       std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_;
-
       //获取这个 op 的 inputs key 和 outputs key
-      auto inputs_and_outputs = op_input_output_key[op_desc->type_];
+      const VariableNameMap &op_inputs = op_desc->GetInputs();
+      const VariableNameMap &op_outputs = op_desc->GetOutputs();
 
       //判断现在 是否存在这个 op
       //判断这个 output 和 input key 的 size 等于 1
-      if (op_input_output_key.find(op_desc->type_) !=
-              op_input_output_key.end() &&
-          inputs_and_outputs.first.size() == 1 &&
-          inputs_and_outputs.second.size() == 1) {
-        auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]);
-        auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]);
+      if (op_outputs.size() == 1 && op_inputs.size() == 1) {
+        auto inputs_of_output = op_inputs.begin()->second;
+        auto outputs_of_output = op_outputs.begin()->second;
 
         // 判断一下, 如果输入和输出没有同名, 是支持的
         for (int i = 0; i < inputs_of_output.size(); ++i) {
@@ -243,7 +220,7 @@ void ProgramOptimize::GenerateOps(
           }
         }
       } else {  // 如果模型中包含没有的 op, 则不支持添加 split
-        DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
+        DLOG << "找不到 这个 op 类型: " << output->op_desc_->Type();
         can_add_split = false;
       }
     }
@@ -312,9 +289,6 @@ void ProgramOptimize::GenerateOps(
 void ProgramOptimize::GenerateOps(
     std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, Node *begin_node,
     bool can_add_split) {
-  // std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
-  //             Node *input_node, Node *current_node, bool adding_thread, int
-  //             thread_num
   if (can_add_split) {
     this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr);
   } else {
diff --git a/src/operators/assign_value_op.cpp b/src/operators/assign_value_op.cpp
new file mode 100644
index 0000000000..49494929de
--- /dev/null
+++ b/src/operators/assign_value_op.cpp
@@ -0,0 +1,37 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef ASSIGN_VALUE_OP
+
+#include "operators/assign_value_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype, typename T>
+void AssignValueOp<Dtype, T>::InferShape() const {
+  const auto &shape = this->param_.shape_;
+  this->param_.output_->Resize(framework::make_ddim(shape));
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(assign_value, ops::AssignValueOp);
+#endif
+
+#endif  // ASSIGN_VALUE_OP
diff --git a/src/operators/assign_value_op.h b/src/operators/assign_value_op.h
new file mode 100644
index 0000000000..ce319d333a
--- /dev/null
+++ b/src/operators/assign_value_op.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef ASSIGN_VALUE_OP
+
+#pragma once
+
+#include <string>
+
+#include "framework/operator.h"
+#include "operators/kernel/assign_value_kernel.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+DECLARE_OPERATOR(AssignValue, AssignValueParam, AssignValueKernel);
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/flatten_op.cpp b/src/operators/flatten_op.cpp
index 932f780d03..844053b21c 100644
--- a/src/operators/flatten_op.cpp
+++ b/src/operators/flatten_op.cpp
@@ -31,29 +31,22 @@ void FlattenOp<DeviceType, T>::InferShape() const {
                         "The axis should be greater than or equal to 0.");
 
   auto &in_dims = this->param_.InputX()->dims();
-  //  const auto &in_dims = ctx->GetInputDim("X");
   PADDLE_MOBILE_ENFORCE(
       axis <= in_dims.size(),
       "The axis should be less than or equal to input tensor's rank.");
 
   const auto &out_dims = GetOutputShape(axis, in_dims);
   this->param_.Out()->Resize(in_dims);
-  // todo supprot lodtensor
-  //  if (in_dims[0] == out_dims[0]) {
-  //    // Only pass LoD when the first dimension of output and Input(X)
-  //    // are the same.
-  //    ctx->ShareLoD("X", "Out");
-  //  }
 }
 
 }  // namespace operators
 }  // namespace paddle_mobile
 
 namespace ops = paddle_mobile::operators;
+
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(flatten, ops::FlattenOp);
-#endif
-#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_CPU(flatten2, ops::Flatten2Op);
 #endif
 
-#endif
+#endif  // FLATTEN_OP
diff --git a/src/operators/flatten_op.h b/src/operators/flatten_op.h
index daad2d82d8..ef97994dc1 100644
--- a/src/operators/flatten_op.h
+++ b/src/operators/flatten_op.h
@@ -25,6 +25,7 @@ limitations under the License. */
 
 namespace paddle_mobile {
 namespace operators {
+
 inline std::vector<int32_t> GetOutputShape(const int axis,
                                            const framework::DDim &in_dims) {
   int64_t outer = 1, inner = 1;
@@ -40,7 +41,6 @@ inline std::vector<int32_t> GetOutputShape(const int axis,
   out_shape[1] = static_cast<int>(inner);
   return out_shape;
 }
-using paddle_mobile::framework::Tensor;
 
 template <typename DeviceType, typename T>
 class FlattenOp : public framework::OperatorWithKernel<
@@ -56,6 +56,15 @@ class FlattenOp : public framework::OperatorWithKernel<
   void InferShape() const override;
 };
 
+template <typename DeviceType, typename T>
+class Flatten2Op : public FlattenOp<DeviceType, T> {
+ public:
+  Flatten2Op(const std::string &type, const VariableNameMap &inputs,
+             const VariableNameMap &outputs,
+             const framework::AttributeMap &attrs, framework::Scope *scope)
+      : FlattenOp<DeviceType, T>(type, inputs, outputs, attrs, scope) {}
+};
+
 }  // namespace operators
 }  // namespace paddle_mobile
 
diff --git a/src/operators/kernel/arm/assign_value_kernel.cpp b/src/operators/kernel/arm/assign_value_kernel.cpp
new file mode 100644
index 0000000000..7390f77ed1
--- /dev/null
+++ b/src/operators/kernel/arm/assign_value_kernel.cpp
@@ -0,0 +1,73 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef ASSIGN_VALUE_OP
+
+#include "operators/kernel/assign_value_kernel.h"
+#include "framework/data_type.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+struct AssignValueOpFunctor {
+  framework::LoDTensor* output_;
+  const std::vector<int> shape_;
+  const std::vector<int> int32_values_;
+  const std::vector<float> fp32_values_;
+
+  AssignValueOpFunctor(framework::LoDTensor* output,
+                       const std::vector<int>& shape,
+                       const std::vector<float>& fp32_values,
+                       const std::vector<int>& int32_values)
+      : output_(output),
+        shape_(shape),
+        int32_values_(int32_values),
+        fp32_values_(fp32_values) {}
+
+  template <typename T>
+  inline void apply() const {
+    PADDLE_MOBILE_THROW_EXCEPTION("Assign value: not supported data type.");
+  }
+};
+
+template <>
+inline void AssignValueOpFunctor::apply<int>() const {
+  framework::TensorFromVector<int>(int32_values_, output_);
+  output_->Resize(framework::make_ddim(shape_));
+}
+
+template <>
+inline void AssignValueOpFunctor::apply<float>() const {
+  framework::TensorFromVector<float>(fp32_values_, output_);
+  output_->Resize(framework::make_ddim(shape_));
+}
+
+template <>
+bool AssignValueKernel<CPU, float>::Init(AssignValueParam<CPU>* param) {
+  return true;
+}
+
+template <>
+void AssignValueKernel<CPU, float>::Compute(
+    const AssignValueParam<CPU>& param) {
+  framework::VisitDataType(
+      framework::ToDataType(param.dtype_),
+      AssignValueOpFunctor(param.output_, param.shape_, param.fp32_values_,
+                           param.int32_values_));
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif  // ASSIGN_VALUE_OP
diff --git a/src/operators/kernel/assign_value_kernel.h b/src/operators/kernel/assign_value_kernel.h
new file mode 100644
index 0000000000..5fae921876
--- /dev/null
+++ b/src/operators/kernel/assign_value_kernel.h
@@ -0,0 +1,53 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef ASSIGN_VALUE_OP
+
+#pragma once
+
+#include <vector>
+#include "framework/operator.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype>
+class AssignValueParam : public OpParam {
+ public:
+  AssignValueParam(const VariableNameMap &inputs,
+                   const VariableNameMap &outputs, const AttributeMap &attrs,
+                   Scope *scope)
+      : OpParam(inputs, outputs, attrs, scope) {
+    output_ = GET_VAR_AS_LOD_TENSOR("Out", outputs, *scope);
+    shape_ = OpParam::GetAttr<std::vector<int>>("shape", attrs);
+    fp32_values_ = OpParam::GetAttr<std::vector<float>>("fp32_values", attrs);
+    int32_values_ = OpParam::GetAttr<std::vector<int>>("int32_values", attrs);
+    dtype_ = OpParam::GetAttr<int>("dtype", attrs);
+  }
+
+ public:
+  framework::LoDTensor *output_;
+  std::vector<int> shape_;
+  std::vector<float> fp32_values_;
+  std::vector<int> int32_values_;
+  int dtype_;
+};
+
+DECLARE_KERNEL(AssignValue, AssignValueParam);
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif  // ASSIGN_VALUE_OP
diff --git a/src/operators/kernel/beam_search_kernel.h b/src/operators/kernel/beam_search_kernel.h
index 38fe162b24..bb4a3ced17 100644
--- a/src/operators/kernel/beam_search_kernel.h
+++ b/src/operators/kernel/beam_search_kernel.h
@@ -22,9 +22,6 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 
-#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
-  OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
-
 template <typename Dtype>
 class BeamSearchParam : public OpParam {
  public:
diff --git a/src/operators/kernel/one_hot_kernel.h b/src/operators/kernel/one_hot_kernel.h
index fd883cabee..2cb2e59eb3 100644
--- a/src/operators/kernel/one_hot_kernel.h
+++ b/src/operators/kernel/one_hot_kernel.h
@@ -22,9 +22,6 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 
-#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
-  OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
-
 template <typename Dtype>
 class OnehotParam : public OpParam {
  public:
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index e144bf623f..52d6f6693e 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -434,6 +434,12 @@ class OpParam {
   }
 };
 
+#define GET_VAR_AS_TENSOR(name, name_dict, scope) \
+  OpParam::GetVarValue<framework::Tensor>(name, name_dict, scope)
+
+#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
+  OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
+
 template <typename Dtype>
 class ConvParam : public OpParam {
   typedef typename DtypeTensorTrait<Dtype>::gtype GType;
diff --git a/tools/op.cmake b/tools/op.cmake
index 94dc0b4215..209df1dd7d 100755
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -320,6 +320,7 @@ if(NOT FOUND_MATCH)
   set(BEAM_SEARCH_DECODE_OP ON)
   set(PAD2D_OP ON)
   set(ONE_HOT_OP ON)
+  set(ASSIGN_VALUE_OP ON)
 endif()
 
   # option(BATCHNORM_OP "" ON)
@@ -646,3 +647,6 @@ endif()
 if (ONE_HOT_OP)
   add_definitions(-DONE_HOT_OP)
 endif()
+if (ASSIGN_VALUE_OP)
+  add_definitions(-DASSIGN_VALUE_OP)
+endif()
-- 
GitLab