improve program optimize, add assign value and flatten2 op

3f442466 · hjchen2 · 8eb2ae94 · 3f442466 · 3f442466 · 3f442466
12 changed file
--- a/src/framework/load_ops.h
+++ b/src/framework/load_ops.h
@@ -152,6 +152,7 @@ LOAD_OP1(prelu, CPU);
 #endif
 #ifdef FLATTEN_OP
 LOAD_OP1(flatten, CPU);
+LOAD_OP1(flatten2, CPU);
 #endif
 #ifdef FUSION_CONVBNADDRELU_OP
 LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
@@ -331,3 +332,6 @@ LOAD_OP1(pad2d, CPU);
 #ifdef ONE_HOT_OP
 LOAD_OP1(one_hot, CPU);
 #endif
+#ifdef ASSIGN_VALUE_OP
+LOAD_OP1(assign_value, CPU);
+#endif
--- a/src/framework/program/program-optimize/program_optimize.cpp
+++ b/src/framework/program/program-optimize/program_optimize.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 #include "framework/program/program-optimize/program_optimize.h"
 #include <algorithm>
+#include <utility>
 #include "framework/program/program-optimize/fusion_op_register.h"
 namespace paddle_mobile {
@@ -22,7 +23,6 @@ namespace framework {
 std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
    std::shared_ptr<ProgramDesc> ori_des, bool add_split) {
-  //  ProgramDesc *optimize_program = new ProgramDesc(*ori_des);
  std::shared_ptr<ProgramDesc> optimize_program =
      std::make_shared<ProgramDesc>(*ori_des);
  current_block_ = optimize_program->Blocks().size();
@@ -35,51 +35,35 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
            std::pair<std::shared_ptr<Node>,
                      std::unordered_map<std::string, std::shared_ptr<Node>>>>>
        type_map;
-    std::unordered_map<std::string, bool> output_has;
    std::vector<std::shared_ptr<Node>> nodes;
    std::shared_ptr<Node> begin_node;
    auto block = optimize_program->Block(i);
-    //        DLOG << " ops size: " << block->Ops().size();
    for (int j = 0; j < block->Ops().size(); ++j) {
      auto op = block->Ops()[j];
-      auto op_type = op->Type();
-      if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
-        LOG(kLOG_ERROR) << "has not support op return null "
-                        << " op type: " << op->Type();
-        return nullptr;
-      }
      std::shared_ptr<Node> node = std::make_shared<Node>(op);
-      nodes.push_back(node);
-      //
-      type_map[op->Type()].push_back({node, output_nodes});
      if (j == 0) {
        begin_node = node;
      }
-      auto input_keys = op_input_output_key.at(op->Type()).first;
+      const std::string op_type = op->Type();
-      for (auto input_key : input_keys) {
+      nodes.push_back(node);
-        auto op_inputs = op->Input(input_key);
+      type_map[op_type].push_back({node, output_nodes});
-        for (int l = 0; l < op_inputs.size(); ++l) {
+      const VariableNameMap &op_inputs = op->GetInputs();
-          std::string input_key = op_inputs[l];
+      const VariableNameMap &op_outpus = op->GetOutputs();
-          if (output_nodes.find(input_key) != output_nodes.end()) {
-            auto input_node = output_nodes[input_key];
+      for (const auto &input : op_inputs) {
+        for (const auto &input_name : input.second) {
+          if (output_nodes.find(input_name) != output_nodes.end()) {
+            auto input_node = output_nodes[input_name];
            *input_node > node;
          }
        }
      }
-      auto output_keys = op_input_output_key.at(op_type).second;
+      for (const auto &output : op_outpus) {
+        for (const auto &output_name : output.second) {
-      for (auto output_key : output_keys) {
+          output_nodes[output_name] = node;
-        auto op_outputs = op->Output(output_key);
-        for (int k = 0; k < op_outputs.size(); ++k) {
-          output_nodes[op_outputs[k]] = node;
        }
      }
    }
@@ -97,14 +81,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
        auto depth = matcher->BeginNode().Depth();
        auto sub_node = match_node->To(depth);
-        //        DLOG << " sub node: " << *sub_node;
+        //  DLOG << " sub node: " << *sub_node;
        if (*sub_node == matcher->BeginNode()) {
          bool can_folder = true;
          auto relationship_map = sub_node->Relationship();
          for (auto to_check : matcher->NeedCheck()) {
-            //            if (node_has)
            auto nodes = (*sub_node)[to_check.first];
            for (auto node : nodes) {
              auto inputs_to_check =
@@ -126,13 +109,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
            continue;
          }
-          //          DLOG << " match success " << " fusion node: \n" <<
-          //          matcher->BeginNode() << "\nsub node: \n" << *sub_node;
-          //          DLOG << "match node\n"<< *match_node;
          std::vector<std::shared_ptr<Node>> removed_nodes;
          matcher->FolderNodes(match_node.get(), &removed_nodes);
          for (int k = removed_nodes.size() - 1; k >= 0; --k) {
            auto removed_node = removed_nodes[k];
            auto removed_ite =
@@ -170,12 +148,12 @@ void ProgramOptimize::GenerateOps(
    Node *current_node) {
  if (current_node->inputs_.size() > 1 &&
      input_node != current_node->inputs_.back()) {
-    DLOG << " current type " << current_node->type_;
+    DLOG << " current type " << current_node->Type();
    DLOG << " inputs size of current node > 0 ";
    for (int i = 0; i < current_node->inputs_.size(); ++i) {
-      DLOG << " input i: " << current_node->inputs_[i]->type_;
+      DLOG << " input i: " << current_node->inputs_[i]->Type();
    }
    return;
@@ -201,9 +179,11 @@ void ProgramOptimize::GenerateOps(
  }
  bool can_add_split = false;
+  const auto current_desc = current_node->OpDescOfNode();
+  const VariableNameMap &current_op_inputs = current_desc->GetInputs();
+  const VariableNameMap &current_op_outputs = current_desc->GetOutputs();
  // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
-  if (current_node->outputs_.size() > 1 &&
+  if (current_node->outputs_.size() > 1 && current_op_outputs.size() == 1) {
-      op_input_output_key[current_node->op_desc_->type_].second.size() == 1) {
    can_add_split = true;
    // 遍历当前节点的 output 节点
@@ -217,18 +197,15 @@ void ProgramOptimize::GenerateOps(
      //与节点关联的 OpDesc
      std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_;
      //获取这个 op 的 inputs key 和 outputs key
-      auto inputs_and_outputs = op_input_output_key[op_desc->type_];
+      const VariableNameMap &op_inputs = op_desc->GetInputs();
+      const VariableNameMap &op_outputs = op_desc->GetOutputs();
      //判断现在 是否存在这个 op
      //判断这个 output 和 input key 的 size 等于 1
-      if (op_input_output_key.find(op_desc->type_) !=
+      if (op_outputs.size() == 1 && op_inputs.size() == 1) {
-              op_input_output_key.end() &&
+        auto inputs_of_output = op_inputs.begin()->second;
-          inputs_and_outputs.first.size() == 1 &&
+        auto outputs_of_output = op_outputs.begin()->second;
-          inputs_and_outputs.second.size() == 1) {
-        auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]);
-        auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]);
        // 判断一下, 如果输入和输出没有同名, 是支持的
        for (int i = 0; i < inputs_of_output.size(); ++i) {
@@ -243,7 +220,7 @@ void ProgramOptimize::GenerateOps(
          }
        }
      } else {  // 如果模型中包含没有的 op, 则不支持添加 split
-        DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
+        DLOG << "找不到 这个 op 类型: " << output->op_desc_->Type();
        can_add_split = false;
      }
    }
@@ -312,9 +289,6 @@ void ProgramOptimize::GenerateOps(
 void ProgramOptimize::GenerateOps(
    std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, Node *begin_node,
    bool can_add_split) {
-  // std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
-  //             Node *input_node, Node *current_node, bool adding_thread, int
-  //             thread_num
  if (can_add_split) {
    this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr);
  } else {

--- a/src/operators/assign_value_op.cpp
+++ b/src/operators/assign_value_op.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef ASSIGN_VALUE_OP
+#include "operators/assign_value_op.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype, typename T>
+void AssignValueOp<Dtype, T>::InferShape() const {
+  const auto &shape = this->param_.shape_;
+  this->param_.output_->Resize(framework::make_ddim(shape));
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(assign_value, ops::AssignValueOp);
+#endif
+#endif  // ASSIGN_VALUE_OP
--- a/src/operators/assign_value_op.h
+++ b/src/operators/assign_value_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef ASSIGN_VALUE_OP
+#pragma once
+#include <string>
+#include "framework/operator.h"
+#include "operators/kernel/assign_value_kernel.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+DECLARE_OPERATOR(AssignValue, AssignValueParam, AssignValueKernel);
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif
--- a/src/operators/flatten_op.cpp
+++ b/src/operators/flatten_op.cpp
@@ -31,29 +31,22 @@ void FlattenOp<DeviceType, T>::InferShape() const {
                        "The axis should be greater than or equal to 0.");
  auto &in_dims = this->param_.InputX()->dims();
-  //  const auto &in_dims = ctx->GetInputDim("X");
  PADDLE_MOBILE_ENFORCE(
      axis <= in_dims.size(),
      "The axis should be less than or equal to input tensor's rank.");
  const auto &out_dims = GetOutputShape(axis, in_dims);
  this->param_.Out()->Resize(in_dims);
-  // todo supprot lodtensor
-  //  if (in_dims[0] == out_dims[0]) {
-  //    // Only pass LoD when the first dimension of output and Input(X)
-  //    // are the same.
-  //    ctx->ShareLoD("X", "Out");
-  //  }
 }
 }  // namespace operators
 }  // namespace paddle_mobile
 namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(flatten, ops::FlattenOp);
-#endif
+REGISTER_OPERATOR_CPU(flatten2, ops::Flatten2Op);
-#ifdef PADDLE_MOBILE_FPGA
 #endif
-#endif
+#endif  // FLATTEN_OP
--- a/src/operators/flatten_op.h
+++ b/src/operators/flatten_op.h
@@ -25,6 +25,7 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
 inline std::vector<int32_t> GetOutputShape(const int axis,
                                           const framework::DDim &in_dims) {
  int64_t outer = 1, inner = 1;
@@ -40,7 +41,6 @@ inline std::vector<int32_t> GetOutputShape(const int axis,
  out_shape[1] = static_cast<int>(inner);
  return out_shape;
 }
-using paddle_mobile::framework::Tensor;
 template <typename DeviceType, typename T>
 class FlattenOp : public framework::OperatorWithKernel<
@@ -56,6 +56,15 @@ class FlattenOp : public framework::OperatorWithKernel<
  void InferShape() const override;
 };
+template <typename DeviceType, typename T>
+class Flatten2Op : public FlattenOp<DeviceType, T> {
+ public:
+  Flatten2Op(const std::string &type, const VariableNameMap &inputs,
+             const VariableNameMap &outputs,
+             const framework::AttributeMap &attrs, framework::Scope *scope)
+      : FlattenOp<DeviceType, T>(type, inputs, outputs, attrs, scope) {}
+};
 }  // namespace operators
 }  // namespace paddle_mobile

--- a/src/operators/kernel/arm/assign_value_kernel.cpp
+++ b/src/operators/kernel/arm/assign_value_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef ASSIGN_VALUE_OP
+#include "operators/kernel/assign_value_kernel.h"
+#include "framework/data_type.h"
+namespace paddle_mobile {
+namespace operators {
+struct AssignValueOpFunctor {
+  framework::LoDTensor* output_;
+  const std::vector<int> shape_;
+  const std::vector<int> int32_values_;
+  const std::vector<float> fp32_values_;
+  AssignValueOpFunctor(framework::LoDTensor* output,
+                       const std::vector<int>& shape,
+                       const std::vector<float>& fp32_values,
+                       const std::vector<int>& int32_values)
+      : output_(output),
+        shape_(shape),
+        int32_values_(int32_values),
+        fp32_values_(fp32_values) {}
+  template <typename T>
+  inline void apply() const {
+    PADDLE_MOBILE_THROW_EXCEPTION("Assign value: not supported data type.");
+  }
+};
+template <>
+inline void AssignValueOpFunctor::apply<int>() const {
+  framework::TensorFromVector<int>(int32_values_, output_);
+  output_->Resize(framework::make_ddim(shape_));
+}
+template <>
+inline void AssignValueOpFunctor::apply<float>() const {
+  framework::TensorFromVector<float>(fp32_values_, output_);
+  output_->Resize(framework::make_ddim(shape_));
+}
+template <>
+bool AssignValueKernel<CPU, float>::Init(AssignValueParam<CPU>* param) {
+  return true;
+}
+template <>
+void AssignValueKernel<CPU, float>::Compute(
+    const AssignValueParam<CPU>& param) {
+  framework::VisitDataType(
+      framework::ToDataType(param.dtype_),
+      AssignValueOpFunctor(param.output_, param.shape_, param.fp32_values_,
+                           param.int32_values_));
+}
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif  // ASSIGN_VALUE_OP
--- a/src/operators/kernel/assign_value_kernel.h
+++ b/src/operators/kernel/assign_value_kernel.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifdef ASSIGN_VALUE_OP
+#pragma once
+#include <vector>
+#include "framework/operator.h"
+#include "operators/op_param.h"
+namespace paddle_mobile {
+namespace operators {
+template <typename Dtype>
+class AssignValueParam : public OpParam {
+ public:
+  AssignValueParam(const VariableNameMap &inputs,
+                   const VariableNameMap &outputs, const AttributeMap &attrs,
+                   Scope *scope)
+      : OpParam(inputs, outputs, attrs, scope) {
+    output_ = GET_VAR_AS_LOD_TENSOR("Out", outputs, *scope);
+    shape_ = OpParam::GetAttr<std::vector<int>>("shape", attrs);
+    fp32_values_ = OpParam::GetAttr<std::vector<float>>("fp32_values", attrs);
+    int32_values_ = OpParam::GetAttr<std::vector<int>>("int32_values", attrs);
+    dtype_ = OpParam::GetAttr<int>("dtype", attrs);
+  }
+ public:
+  framework::LoDTensor *output_;
+  std::vector<int> shape_;
+  std::vector<float> fp32_values_;
+  std::vector<int> int32_values_;
+  int dtype_;
+};
+DECLARE_KERNEL(AssignValue, AssignValueParam);
+}  // namespace operators
+}  // namespace paddle_mobile
+#endif  // ASSIGN_VALUE_OP
--- a/src/operators/kernel/beam_search_kernel.h
+++ b/src/operators/kernel/beam_search_kernel.h
@@ -22,9 +22,6 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
-#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
-  OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
 template <typename Dtype>
 class BeamSearchParam : public OpParam {
 public:

--- a/src/operators/kernel/one_hot_kernel.h
+++ b/src/operators/kernel/one_hot_kernel.h
@@ -22,9 +22,6 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {
-#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
-  OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
 template <typename Dtype>
 class OnehotParam : public OpParam {
 public:

--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -434,6 +434,12 @@ class OpParam {
  }
 };
+#define GET_VAR_AS_TENSOR(name, name_dict, scope) \
+  OpParam::GetVarValue<framework::Tensor>(name, name_dict, scope)
+#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
+  OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
 template <typename Dtype>
 class ConvParam : public OpParam {
  typedef typename DtypeTensorTrait<Dtype>::gtype GType;

--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -320,6 +320,7 @@ if(NOT FOUND_MATCH)
  set(BEAM_SEARCH_DECODE_OP ON)
  set(PAD2D_OP ON)
  set(ONE_HOT_OP ON)
+  set(ASSIGN_VALUE_OP ON)
 endif()
  # option(BATCHNORM_OP "" ON)
@@ -646,3 +647,6 @@ endif()
 if (ONE_HOT_OP)
  add_definitions(-DONE_HOT_OP)
 endif()
+if (ASSIGN_VALUE_OP)
+  add_definitions(-DASSIGN_VALUE_OP)
+endif()