Parse sub-block attribute in while op correctly

f79ad065 · hjchen2 · f87b9db5 · f79ad065 · f79ad065 · f79ad065
11 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ option(USE_OPENMP    "build with openmp support" ON)
 option(USE_EXCEPTION "build with exception" ON)
 option(WITH_LOGGING  "print logging for debug" ON)
 option(WITH_SYMBOL   "build with all symbols" ON) # turn off if use jni or ios io
-option(WITH_PROFILE  "print op profile for debug" OFF)
+option(WITH_PROFILE  "print op profile for debug" ON)
 option(WITH_TEST     "build with unit tests" ON)

 # select the platform to build
@@ -23,7 +23,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
 file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
 include_directories(src/)

-set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}")
+set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS} -Wno-attributes")
 if(IS_IOS)
    set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
        -std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")

--- a/src/framework/attribute.h
+++ b/src/framework/attribute.h
@@ -91,7 +91,6 @@ class Attribute {
        break;
      }
      case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK: {
-        attr.Set<int>(attr_desc->block_idx);
        break;
      }
      default:
@@ -139,6 +138,12 @@ class Attribute {
      return vistor(attr.variant_.Get<vector<bool>>());
    } else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
      return vistor(attr.variant_.Get<int64_t>());
+    } else if (attr.variant_.TypeId() ==
+               typeid(framework::BlockDesc *).hash_code()) {
+      return vistor(attr.variant_.Get<framework::BlockDesc *>());
+    } else if (attr.variant_.TypeId() ==
+               typeid(vector<framework::BlockDesc *>).hash_code()) {
+      return vistor(attr.variant_.Get<vector<framework::BlockDesc *>>());
    } else {
      PADDLE_MOBILE_THROW_EXCEPTION("type not support");
    }

--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -57,25 +57,22 @@ Executor<Device, T>::Executor(const Program<Device> &program,
  PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr,
                        "program_desc_ should not be nullptr");
  const auto &blocks = program_desc_->Blocks();
-  ops_of_block_.resize(blocks.size());
-
-  for (int i = 0; i < blocks.size(); ++i) {
-    std::shared_ptr<BlockDesc> block_desc = blocks[i];
-    std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
-    for (int j = 0; j < ops.size(); ++j) {
-      std::shared_ptr<OpDesc> op_desc = ops[j];
-      DLOG << "create op: " << op_desc->Type();
-
-      auto op_handler = OpRegistry<Device>::CreateOp(
-          op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
-          op_desc->GetAttrMap(), program_.scope);
-      // infer shape to reshape inputs and outputs before predict,
-      // but for lod mode, it still need to infer shape in runtime
-      if (!lod_mode) {
-        op_handler->InferShape();
-      }
-      ops_of_block_[i].push_back(op_handler);
+
+  std::shared_ptr<BlockDesc> block_desc = blocks[0];
+  std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
+  for (int j = 0; j < ops.size(); ++j) {
+    std::shared_ptr<OpDesc> op_desc = ops[j];
+    DLOG << "create op: " << op_desc->Type();
+
+    auto op_handler = OpRegistry<Device>::CreateOp(
+        op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
+        op_desc->GetAttrMap(), program_.scope);
+    // infer shape to reshape inputs and outputs before predict,
+    // but for lod mode, it still need to infer shape in runtime
+    if (!lod_mode) {
+      op_handler->InferShape();
    }
+    ops_of_block0_.push_back(op_handler);
  }

  if (program_.combined) {
@@ -85,12 +82,9 @@ Executor<Device, T>::Executor(const Program<Device> &program,
  }

  int count = 0;
-  for (int block_id = 0; block_id < ops_of_block_.size(); ++block_id) {
-    for (auto &op_handler : ops_of_block_[block_id]) {
-      DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type();
-      op_handler->Init();
-      ops_list_.push_back(op_handler);
-    }
+  for (auto &op_handler : ops_of_block0_) {
+    DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type();
+    op_handler->Init();
  }
 }

@@ -373,41 +367,40 @@ void Executor<Device, T>::SetInput(const LoDTensor &input,
 template <typename Device, typename T>
 PMStatus Executor<Device, T>::Predict() {
 #ifdef PADDLE_MOBILE_PROFILE
-  std::vector<ProfInfo> profile(ops_list_.size());
+  std::vector<ProfInfo> profile(ops_of_block0_.size());
  struct timespec ts;
  int op_index = 0;
 #endif
-  for (auto &block : ops_of_block_) {
-    for (auto &op_handler : block) {
+  for (auto &op_handler : ops_of_block0_) {
 #ifdef PADDLE_MOBILE_PROFILE
-      clock_gettime(CLOCK_MONOTONIC, &ts);
-      profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
 #endif
-      if (lod_mode_) {
-        op_handler->InferShape();
-      }
-      op_handler->Run();
+    if (lod_mode_) {
+      op_handler->InferShape();
+    }
+    op_handler->Run();
 #ifdef PADDLE_MOBILE_PROFILE
-      clock_gettime(CLOCK_MONOTONIC, &ts);
-      profile[op_index].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
-      ++op_index;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    profile[op_index].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
+    ++op_index;
 #endif
-    }
  }
 #ifdef PADDLE_MOBILE_PROFILE
  std::unordered_map<std::string, uint64_t> _tp;
  for (int i = 0; i < profile.size(); i++) {
    const auto &pInfo = profile[i];
    uint64_t timeCost = pInfo.runEnd - pInfo.runBegin;
-    if (ops_list_[i]->Type() == "conv2d" ||
-        ops_list_[i]->Type() == "depthwise_conv2d") {
-      auto inputs = ops_list_[i]->Inputs();
+    if (ops_of_block0_[i]->Type() == "conv2d" ||
+        ops_of_block0_[i]->Type() == "depthwise_conv2d") {
+      auto inputs = ops_of_block0_[i]->Inputs();
      auto *filter =
          GetVarValue<LoDTensor>("Filter", inputs, *(program_.scope));
      int kernel_size = filter->dims()[2];
-      _tp[ops_list_[i]->Type() + "_" + std::to_string(kernel_size)] += timeCost;
+      _tp[ops_of_block0_[i]->Type() + "_" + std::to_string(kernel_size)] +=
+          timeCost;
    } else {
-      _tp[ops_list_[i]->Type()] += timeCost;
+      _tp[ops_of_block0_[i]->Type()] += timeCost;
    }
  }
  printf("====================[ profile ]======================\n");
@@ -459,7 +452,7 @@ void Executor<Device, T>::FeedData(const Tensor &t) {

 template <typename Device, typename T>
 std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {
-  auto &ops = ops_of_block_[0];
+  auto &ops = ops_of_block0_;

  PADDLE_MOBILE_ENFORCE(id < (int)ops.size(), "Index out of range");
  auto op = id < 0 ? ops[ops.size() - 1] : ops[id];
@@ -473,7 +466,7 @@ std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {

 template <typename Device, typename T>
 void Executor<Device, T>::Predict_From_To(int start, int end) {
-  auto &ops = ops_of_block_[0];
+  auto &ops = ops_of_block0_;
  end = end < 0 ? static_cast<int>(ops.size()) : end;
  PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
                        "start or end parameter is wrong");

--- a/src/framework/executor.h
+++ b/src/framework/executor.h
@@ -78,10 +78,7 @@ class Executor {
  PaddleMobileConfigInternal config_;
  Program<Device> program_;
  std::shared_ptr<ProgramDesc> program_desc_;
-  typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr;
-  std::vector<std::vector<OperatorBasePtr>> ops_of_block_;
-  // operators list
-  std::vector<OperatorBasePtr> ops_list_;
+  std::vector<std::shared_ptr<OperatorBase<Device>>> ops_of_block0_;

  // for super resoltion
  DDim input_dim_last_;

--- a/src/framework/program/op_desc.cpp
+++ b/src/framework/program/op_desc.cpp
@@ -42,9 +42,15 @@ OpDesc::OpDesc(PaddleMobile__Framework__Proto__OpDesc *desc) {
    PaddleMobile__Framework__Proto__OpDesc__Attr *attr = desc->attrs[k];
    std::string attr_name(attr->name);
    attrs_[attr_name] = Attribute::GetAttrValue(attr);
+    proto_attrs_.push_back(*attr);
  }
 }

+const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
+    &OpDesc::GetProtoAttr() const {
+  return proto_attrs_;
+}
+
 const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
  return inputs_.find(name)->second;
 }
@@ -58,6 +64,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const {
  return it->second;
 }

+void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
+  this->attrs_[name].Set<BlockDesc *>(block);
+}
+
+void OpDesc::SetBlocksAttr(const std::string &name,
+                           std::vector<BlockDesc *> blocks) {
+  this->attrs_[name].Set<std::vector<BlockDesc *>>(blocks);
+}
+
 std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() {
  return attrs_;
 }

--- a/src/framework/program/op_desc.h
+++ b/src/framework/program/op_desc.h
@@ -29,11 +29,13 @@ class OpDesc {
  friend class ProgramOptimize;
  friend class FusionOpMatcher;
  friend class Node;
+
  explicit OpDesc(PaddleMobile__Framework__Proto__OpDesc *op_desc);
  OpDesc(const OpDesc &op_desc) : type_(op_desc.type_) {
    this->inputs_ = op_desc.inputs_;
    this->outputs_ = op_desc.outputs_;
    this->attrs_ = op_desc.attrs_;
+    this->proto_attrs_ = op_desc.proto_attrs_;
  }

  OpDesc() {}
@@ -41,6 +43,12 @@ class OpDesc {
  const std::vector<std::string> &Output(const std::string &name) const;
  Attribute GetAttr(const std::string &name) const;

+  const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
+      &GetProtoAttr() const;
+
+  void SetBlockAttr(const std::string &name, BlockDesc *block);
+  void SetBlocksAttr(const std::string &name, std::vector<BlockDesc *> block);
+
  VariableNameMap &GetInputs() { return inputs_; }

  VariableNameMap &GetOutputs() { return outputs_; }
@@ -60,6 +68,7 @@ class OpDesc {
  VariableNameMap inputs_;
  VariableNameMap outputs_;
  AttributeMap attrs_;
+  std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr> proto_attrs_;
 };

 Print &operator<<(Print &printer, const OpDesc &op_desc);

--- a/src/framework/program/program_desc.cpp
+++ b/src/framework/program/program_desc.cpp
@@ -15,8 +15,8 @@ limitations under the License. */
 #include <string>
 #include <vector>

+#include "framework/program/program_desc.h"
 #include "framework/program/tensor_desc.h"
-#include "program_desc.h"

 namespace paddle_mobile {
 namespace framework {
@@ -25,6 +25,25 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
  for (int i = 0; i < desc->n_blocks; ++i) {
    blocks_.emplace_back(std::make_shared<BlockDesc>(desc->blocks[i]));
  }
+  for (auto &block : blocks_) {
+    for (auto op : block->Ops()) {
+      for (const auto &attr : op->GetProtoAttr()) {
+        if (attr.type == PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK) {
+          size_t blk_idx = attr.block_idx;
+          op->SetBlockAttr(attr.name, this->MutableBlock(blk_idx));
+        } else if (attr.type ==
+                   PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS) {
+          size_t n_blocks_idx = attr.n_blocks_idx;
+          int32_t *blks_idx = attr.blocks_idx;
+          std::vector<BlockDesc *> block_descs;
+          for (size_t i = 0; i < n_blocks_idx; ++i) {
+            block_descs.push_back(this->MutableBlock(blks_idx[i]));
+          }
+          op->SetBlocksAttr(attr.name, block_descs);
+        }
+      }
+    }
+  }
 }

 void ProgramDesc::Description(std::string header) {

--- a/src/framework/program/program_desc.h
+++ b/src/framework/program/program_desc.h
@@ -14,6 +14,7 @@ limitations under the License. */

 #pragma once

+#include <string>
 #include <vector>

 #include "common/types.h"
@@ -31,6 +32,14 @@ class ProgramDesc {

  std::shared_ptr<BlockDesc> Block(size_t idx);

+  BlockDesc *MutableBlock(size_t idx) {
+    if (idx == -1) {
+      return nullptr;
+    } else {
+      return blocks_[idx].get();
+    }
+  }
+
  const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; }
  ProgramDesc(const ProgramDesc &program_desc) {
    for (auto &block : program_desc.blocks_) {

--- a/src/operators/kernel/while_kernel.h
+++ b/src/operators/kernel/while_kernel.h
@@ -29,12 +29,12 @@ class WhileParam : public OpParam {
      : inputs_(inputs), outputs_(outputs), scope_(scope) {
    cond_ =
        OpParam::GetVarValue<framework::LoDTensor>("Condition", inputs, scope);
-    sub_block_ = OpParam::GetAttr<int>("sub_block", attrs);
+    sub_block_ = OpParam::GetAttr<framework::BlockDesc *>("sub_block", attrs);
  }

 public:
  framework::LoDTensor *cond_;
-  int sub_block_;
+  const framework::BlockDesc *sub_block_;
  const VariableNameMap inputs_;
  const VariableNameMap outputs_;
  const Scope scope_;

--- a/test/executor_for_test.h
+++ b/test/executor_for_test.h
@@ -57,31 +57,27 @@ class Executor4Test : public Executor<DeviceType> {
      LOG(paddle_mobile::LogLevel::kLOG_ERROR) << "program_desc_ == nullptr";
    }

-    const std::vector<std::shared_ptr<BlockDesc>> blocks =
+    const std::vector<std::shared_ptr<BlockDesc>> &blocks =
        this->program_desc_->Blocks();
-    for (int block_id = 0; block_id < blocks.size(); ++block_id) {
-      std::vector<std::shared_ptr<OpDesc>> ops = blocks[block_id]->Ops();
-      for (int i = 0; i < ops.size(); ++i) {
-        auto op = ops[i];
-        if (op->Type() == op_type) {
-          DLOG << "匹配到: " << op->Type();
-
-          /// test first meeting op in program
-          std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>>
-              op_ptr =
-                  paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
-                      op->Type(), op->GetInputs(), op->GetOutputs(),
-                      op->GetAttrMap(), this->program_.scope);
-          this->ops_of_block_[block_id].push_back(op_ptr);
-          break;
-        }
+    std::vector<std::shared_ptr<OpDesc>> ops = blocks[0]->Ops();
+    for (int i = 0; i < ops.size(); ++i) {
+      auto op = ops[i];
+      if (op->Type() == op_type) {
+        DLOG << "匹配到: " << op->Type();
+
+        /// test first meeting op in program
+        std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>>
+            op_ptr = paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
+                op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
+                this->program_.scope);
+        this->ops_of_block0_.push_back(op_ptr);
+        break;
      }
    }
+
    this->InitMemory();
-    for (const auto &ops : this->ops_of_block_) {
-      for (const auto &op : ops) {
-        op->Init();
-      }
+    for (const auto &op : this->ops_of_block0_) {
+      op->Init();
    }
  }

@@ -114,10 +110,8 @@ class Executor4Test : public Executor<DeviceType> {
      output_tensor_sptrs[i].reset(output_tensors[i]);
    }

-    for (auto &ops : this->ops_of_block_) {
-      for (auto &op : ops) {
-        op->Run();
-      }
+    for (auto &op : this->ops_of_block0_) {
+      op->Run();
    }

    return output_tensor_sptrs;
@@ -134,11 +128,10 @@ class Executor4Test : public Executor<DeviceType> {
    auto *output_tensor = con_output->GetMutable<LoDTensor>();
    output_tensor->mutable_data<float>(dDim);

-    for (auto &ops : this->ops_of_block_) {
-      for (auto &op : ops) {
-        op->Run();
-      }
+    for (auto &op : this->ops_of_block0_) {
+      op->Run();
    }
+
    return std::make_shared<paddle_mobile::framework::Tensor>(
        paddle_mobile::framework::Tensor(*output_tensor));
  }

--- a/tools/ci_build.sh
+++ b/tools/ci_build.sh
@@ -64,7 +64,7 @@ function check_ndk() {
 }

 function build_android_armv7_cpu_only() {
-  rm -rf ../build/armeabi-v7a
+#  rm -rf ../build/armeabi-v7a
  cmake .. \
    -B"../build/armeabi-v7a" \
    -DANDROID_ABI="armeabi-v7a with NEON" \
@@ -74,6 +74,7 @@ function build_android_armv7_cpu_only() {
    -DANDROID_STL=c++_static \
    -DANDROID=true \
    -DWITH_LOGGING=OFF \
+    -DCPU=ON \
    -DGPU_MALI=OFF \
    -DGPU_CL=OFF \
    -DFPGA=OFF
@@ -93,6 +94,7 @@ function build_android_armv7_gpu() {
    -DANDROID_STL=c++_static \
    -DANDROID=true \
    -DWITH_LOGGING=OFF \
+    -DCPU=ON \
    -DGPU_MALI=ON \
    -DGPU_CL=ON \
    -DFPGA=OFF
@@ -112,6 +114,7 @@ function build_android_armv8_cpu_only() {
    -DANDROID_STL=c++_static \
    -DANDROID=true \
    -DWITH_LOGGING=OFF \
+    -DCPU=ON \
    -DGPU_MALI=OFF \
    -DGPU_CL=OFF \
    -DFPGA=OFF
@@ -131,6 +134,7 @@ function build_android_armv8_gpu() {
    -DANDROID_STL=c++_static \
    -DANDROID=true \
    -DWITH_LOGGING=OFF \
+    -DCPU=ON \
    -DGPU_MALI=ON \
    -DGPU_CL=ON \
    -DFPGA=OFF
@@ -149,6 +153,7 @@ function build_ios_armv8_cpu_only() {
    -DIOS_ARCH="${IOS_ARCH}" \
    -DIS_IOS=true \
    -DUSE_OPENMP=OFF \
+    -DCPU=ON \
    -DGPU_MALI=OFF \
    -DGPU_CL=OFF \
    -DFPGA=OFF
@@ -167,6 +172,7 @@ function build_ios_armv8_gpu() {
    -DIOS_ARCH="${IOS_ARCH}" \
    -DIS_IOS=true \
    -DUSE_OPENMP=OFF \
+    -DCPU=ON \
    -DGPU_MALI=OFF \
    -DGPU_CL=ON \
    -DFPGA=OFF
@@ -181,6 +187,7 @@ function build_linux_armv7_cpu_only() {
    -B"../build/armv7_linux" \
    -DCMAKE_BUILD_TYPE="MinSizeRel" \
    -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
+    -DCPU=ON \
    -DGPU_MALI=OFF \
    -DGPU_CL=OFF \
    -DFPGA=OFF
@@ -195,6 +202,7 @@ function build_linux_armv7_gpu() {
    -B"../build/armv7_linux" \
    -DCMAKE_BUILD_TYPE="MinSizeRel" \
    -DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
+    -DCPU=ON \
    -DGPU_MALI=ON \
    -DGPU_CL=ON \
    -DFPGA=OFF