提交 f79ad065 编写于 作者: H hjchen2

Parse sub-block attribute in while op correctly

上级 f87b9db5
......@@ -4,7 +4,7 @@ option(USE_OPENMP "build with openmp support" ON)
option(USE_EXCEPTION "build with exception" ON)
option(WITH_LOGGING "print logging for debug" ON)
option(WITH_SYMBOL "build with all symbols" ON) # turn off if use jni or ios io
option(WITH_PROFILE "print op profile for debug" OFF)
option(WITH_PROFILE "print op profile for debug" ON)
option(WITH_TEST "build with unit tests" ON)
# select the platform to build
......@@ -23,7 +23,7 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
include_directories(src/)
set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS} -Wno-attributes")
if(IS_IOS)
set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
-std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
......
......@@ -91,7 +91,6 @@ class Attribute {
break;
}
case PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK: {
attr.Set<int>(attr_desc->block_idx);
break;
}
default:
......@@ -139,6 +138,12 @@ class Attribute {
return vistor(attr.variant_.Get<vector<bool>>());
} else if (attr.variant_.TypeId() == typeid(int64_t).hash_code()) {
return vistor(attr.variant_.Get<int64_t>());
} else if (attr.variant_.TypeId() ==
typeid(framework::BlockDesc *).hash_code()) {
return vistor(attr.variant_.Get<framework::BlockDesc *>());
} else if (attr.variant_.TypeId() ==
typeid(vector<framework::BlockDesc *>).hash_code()) {
return vistor(attr.variant_.Get<vector<framework::BlockDesc *>>());
} else {
PADDLE_MOBILE_THROW_EXCEPTION("type not support");
}
......
......@@ -57,25 +57,22 @@ Executor<Device, T>::Executor(const Program<Device> &program,
PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr,
"program_desc_ should not be nullptr");
const auto &blocks = program_desc_->Blocks();
ops_of_block_.resize(blocks.size());
for (int i = 0; i < blocks.size(); ++i) {
std::shared_ptr<BlockDesc> block_desc = blocks[i];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op_desc = ops[j];
DLOG << "create op: " << op_desc->Type();
auto op_handler = OpRegistry<Device>::CreateOp(
op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
op_desc->GetAttrMap(), program_.scope);
// infer shape to reshape inputs and outputs before predict,
// but for lod mode, it still need to infer shape in runtime
if (!lod_mode) {
op_handler->InferShape();
}
ops_of_block_[i].push_back(op_handler);
std::shared_ptr<BlockDesc> block_desc = blocks[0];
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (int j = 0; j < ops.size(); ++j) {
std::shared_ptr<OpDesc> op_desc = ops[j];
DLOG << "create op: " << op_desc->Type();
auto op_handler = OpRegistry<Device>::CreateOp(
op_desc->Type(), op_desc->GetInputs(), op_desc->GetOutputs(),
op_desc->GetAttrMap(), program_.scope);
// infer shape to reshape inputs and outputs before predict,
// but for lod mode, it still need to infer shape in runtime
if (!lod_mode) {
op_handler->InferShape();
}
ops_of_block0_.push_back(op_handler);
}
if (program_.combined) {
......@@ -85,12 +82,9 @@ Executor<Device, T>::Executor(const Program<Device> &program,
}
int count = 0;
for (int block_id = 0; block_id < ops_of_block_.size(); ++block_id) {
for (auto &op_handler : ops_of_block_[block_id]) {
DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type();
op_handler->Init();
ops_list_.push_back(op_handler);
}
for (auto &op_handler : ops_of_block0_) {
DLOG << "Initialize op[" << count++ << "]: " << op_handler->Type();
op_handler->Init();
}
}
......@@ -373,41 +367,40 @@ void Executor<Device, T>::SetInput(const LoDTensor &input,
template <typename Device, typename T>
PMStatus Executor<Device, T>::Predict() {
#ifdef PADDLE_MOBILE_PROFILE
std::vector<ProfInfo> profile(ops_list_.size());
std::vector<ProfInfo> profile(ops_of_block0_.size());
struct timespec ts;
int op_index = 0;
#endif
for (auto &block : ops_of_block_) {
for (auto &op_handler : block) {
for (auto &op_handler : ops_of_block0_) {
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
if (lod_mode_) {
op_handler->InferShape();
}
op_handler->Run();
if (lod_mode_) {
op_handler->InferShape();
}
op_handler->Run();
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
++op_index;
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[op_index].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
++op_index;
#endif
}
}
#ifdef PADDLE_MOBILE_PROFILE
std::unordered_map<std::string, uint64_t> _tp;
for (int i = 0; i < profile.size(); i++) {
const auto &pInfo = profile[i];
uint64_t timeCost = pInfo.runEnd - pInfo.runBegin;
if (ops_list_[i]->Type() == "conv2d" ||
ops_list_[i]->Type() == "depthwise_conv2d") {
auto inputs = ops_list_[i]->Inputs();
if (ops_of_block0_[i]->Type() == "conv2d" ||
ops_of_block0_[i]->Type() == "depthwise_conv2d") {
auto inputs = ops_of_block0_[i]->Inputs();
auto *filter =
GetVarValue<LoDTensor>("Filter", inputs, *(program_.scope));
int kernel_size = filter->dims()[2];
_tp[ops_list_[i]->Type() + "_" + std::to_string(kernel_size)] += timeCost;
_tp[ops_of_block0_[i]->Type() + "_" + std::to_string(kernel_size)] +=
timeCost;
} else {
_tp[ops_list_[i]->Type()] += timeCost;
_tp[ops_of_block0_[i]->Type()] += timeCost;
}
}
printf("====================[ profile ]======================\n");
......@@ -459,7 +452,7 @@ void Executor<Device, T>::FeedData(const Tensor &t) {
template <typename Device, typename T>
std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {
auto &ops = ops_of_block_[0];
auto &ops = ops_of_block0_;
PADDLE_MOBILE_ENFORCE(id < (int)ops.size(), "Index out of range");
auto op = id < 0 ? ops[ops.size() - 1] : ops[id];
......@@ -473,7 +466,7 @@ std::shared_ptr<Tensor> Executor<Device, T>::FetchResult(int id) {
template <typename Device, typename T>
void Executor<Device, T>::Predict_From_To(int start, int end) {
auto &ops = ops_of_block_[0];
auto &ops = ops_of_block0_;
end = end < 0 ? static_cast<int>(ops.size()) : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong");
......
......@@ -78,10 +78,7 @@ class Executor {
PaddleMobileConfigInternal config_;
Program<Device> program_;
std::shared_ptr<ProgramDesc> program_desc_;
typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr;
std::vector<std::vector<OperatorBasePtr>> ops_of_block_;
// operators list
std::vector<OperatorBasePtr> ops_list_;
std::vector<std::shared_ptr<OperatorBase<Device>>> ops_of_block0_;
// for super resoltion
DDim input_dim_last_;
......
......@@ -42,9 +42,15 @@ OpDesc::OpDesc(PaddleMobile__Framework__Proto__OpDesc *desc) {
PaddleMobile__Framework__Proto__OpDesc__Attr *attr = desc->attrs[k];
std::string attr_name(attr->name);
attrs_[attr_name] = Attribute::GetAttrValue(attr);
proto_attrs_.push_back(*attr);
}
}
const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
&OpDesc::GetProtoAttr() const {
return proto_attrs_;
}
const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
return inputs_.find(name)->second;
}
......@@ -58,6 +64,15 @@ Attribute OpDesc::GetAttr(const std::string &name) const {
return it->second;
}
void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) {
this->attrs_[name].Set<BlockDesc *>(block);
}
void OpDesc::SetBlocksAttr(const std::string &name,
std::vector<BlockDesc *> blocks) {
this->attrs_[name].Set<std::vector<BlockDesc *>>(blocks);
}
std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() {
return attrs_;
}
......
......@@ -29,11 +29,13 @@ class OpDesc {
friend class ProgramOptimize;
friend class FusionOpMatcher;
friend class Node;
explicit OpDesc(PaddleMobile__Framework__Proto__OpDesc *op_desc);
OpDesc(const OpDesc &op_desc) : type_(op_desc.type_) {
this->inputs_ = op_desc.inputs_;
this->outputs_ = op_desc.outputs_;
this->attrs_ = op_desc.attrs_;
this->proto_attrs_ = op_desc.proto_attrs_;
}
OpDesc() {}
......@@ -41,6 +43,12 @@ class OpDesc {
const std::vector<std::string> &Output(const std::string &name) const;
Attribute GetAttr(const std::string &name) const;
const std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr>
&GetProtoAttr() const;
void SetBlockAttr(const std::string &name, BlockDesc *block);
void SetBlocksAttr(const std::string &name, std::vector<BlockDesc *> block);
VariableNameMap &GetInputs() { return inputs_; }
VariableNameMap &GetOutputs() { return outputs_; }
......@@ -60,6 +68,7 @@ class OpDesc {
VariableNameMap inputs_;
VariableNameMap outputs_;
AttributeMap attrs_;
std::vector<PaddleMobile__Framework__Proto__OpDesc__Attr> proto_attrs_;
};
Print &operator<<(Print &printer, const OpDesc &op_desc);
......
......@@ -15,8 +15,8 @@ limitations under the License. */
#include <string>
#include <vector>
#include "framework/program/program_desc.h"
#include "framework/program/tensor_desc.h"
#include "program_desc.h"
namespace paddle_mobile {
namespace framework {
......@@ -25,6 +25,25 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
for (int i = 0; i < desc->n_blocks; ++i) {
blocks_.emplace_back(std::make_shared<BlockDesc>(desc->blocks[i]));
}
for (auto &block : blocks_) {
for (auto op : block->Ops()) {
for (const auto &attr : op->GetProtoAttr()) {
if (attr.type == PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCK) {
size_t blk_idx = attr.block_idx;
op->SetBlockAttr(attr.name, this->MutableBlock(blk_idx));
} else if (attr.type ==
PADDLE_MOBILE__FRAMEWORK__PROTO__ATTR_TYPE__BLOCKS) {
size_t n_blocks_idx = attr.n_blocks_idx;
int32_t *blks_idx = attr.blocks_idx;
std::vector<BlockDesc *> block_descs;
for (size_t i = 0; i < n_blocks_idx; ++i) {
block_descs.push_back(this->MutableBlock(blks_idx[i]));
}
op->SetBlocksAttr(attr.name, block_descs);
}
}
}
}
}
void ProgramDesc::Description(std::string header) {
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "common/types.h"
......@@ -31,6 +32,14 @@ class ProgramDesc {
std::shared_ptr<BlockDesc> Block(size_t idx);
BlockDesc *MutableBlock(size_t idx) {
if (idx == -1) {
return nullptr;
} else {
return blocks_[idx].get();
}
}
const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; }
ProgramDesc(const ProgramDesc &program_desc) {
for (auto &block : program_desc.blocks_) {
......
......@@ -29,12 +29,12 @@ class WhileParam : public OpParam {
: inputs_(inputs), outputs_(outputs), scope_(scope) {
cond_ =
OpParam::GetVarValue<framework::LoDTensor>("Condition", inputs, scope);
sub_block_ = OpParam::GetAttr<int>("sub_block", attrs);
sub_block_ = OpParam::GetAttr<framework::BlockDesc *>("sub_block", attrs);
}
public:
framework::LoDTensor *cond_;
int sub_block_;
const framework::BlockDesc *sub_block_;
const VariableNameMap inputs_;
const VariableNameMap outputs_;
const Scope scope_;
......
......@@ -57,31 +57,27 @@ class Executor4Test : public Executor<DeviceType> {
LOG(paddle_mobile::LogLevel::kLOG_ERROR) << "program_desc_ == nullptr";
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
const std::vector<std::shared_ptr<BlockDesc>> &blocks =
this->program_desc_->Blocks();
for (int block_id = 0; block_id < blocks.size(); ++block_id) {
std::vector<std::shared_ptr<OpDesc>> ops = blocks[block_id]->Ops();
for (int i = 0; i < ops.size(); ++i) {
auto op = ops[i];
if (op->Type() == op_type) {
DLOG << "匹配到: " << op->Type();
/// test first meeting op in program
std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>>
op_ptr =
paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), this->program_.scope);
this->ops_of_block_[block_id].push_back(op_ptr);
break;
}
std::vector<std::shared_ptr<OpDesc>> ops = blocks[0]->Ops();
for (int i = 0; i < ops.size(); ++i) {
auto op = ops[i];
if (op->Type() == op_type) {
DLOG << "匹配到: " << op->Type();
/// test first meeting op in program
std::shared_ptr<paddle_mobile::framework::OperatorBase<DeviceType>>
op_ptr = paddle_mobile::framework::OpRegistry<DeviceType>::CreateOp(
op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(),
this->program_.scope);
this->ops_of_block0_.push_back(op_ptr);
break;
}
}
this->InitMemory();
for (const auto &ops : this->ops_of_block_) {
for (const auto &op : ops) {
op->Init();
}
for (const auto &op : this->ops_of_block0_) {
op->Init();
}
}
......@@ -114,10 +110,8 @@ class Executor4Test : public Executor<DeviceType> {
output_tensor_sptrs[i].reset(output_tensors[i]);
}
for (auto &ops : this->ops_of_block_) {
for (auto &op : ops) {
op->Run();
}
for (auto &op : this->ops_of_block0_) {
op->Run();
}
return output_tensor_sptrs;
......@@ -134,11 +128,10 @@ class Executor4Test : public Executor<DeviceType> {
auto *output_tensor = con_output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>(dDim);
for (auto &ops : this->ops_of_block_) {
for (auto &op : ops) {
op->Run();
}
for (auto &op : this->ops_of_block0_) {
op->Run();
}
return std::make_shared<paddle_mobile::framework::Tensor>(
paddle_mobile::framework::Tensor(*output_tensor));
}
......
......@@ -64,7 +64,7 @@ function check_ndk() {
}
function build_android_armv7_cpu_only() {
rm -rf ../build/armeabi-v7a
# rm -rf ../build/armeabi-v7a
cmake .. \
-B"../build/armeabi-v7a" \
-DANDROID_ABI="armeabi-v7a with NEON" \
......@@ -74,6 +74,7 @@ function build_android_armv7_cpu_only() {
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DCPU=ON \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
......@@ -93,6 +94,7 @@ function build_android_armv7_gpu() {
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DCPU=ON \
-DGPU_MALI=ON \
-DGPU_CL=ON \
-DFPGA=OFF
......@@ -112,6 +114,7 @@ function build_android_armv8_cpu_only() {
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DCPU=ON \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
......@@ -131,6 +134,7 @@ function build_android_armv8_gpu() {
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DCPU=ON \
-DGPU_MALI=ON \
-DGPU_CL=ON \
-DFPGA=OFF
......@@ -149,6 +153,7 @@ function build_ios_armv8_cpu_only() {
-DIOS_ARCH="${IOS_ARCH}" \
-DIS_IOS=true \
-DUSE_OPENMP=OFF \
-DCPU=ON \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
......@@ -167,6 +172,7 @@ function build_ios_armv8_gpu() {
-DIOS_ARCH="${IOS_ARCH}" \
-DIS_IOS=true \
-DUSE_OPENMP=OFF \
-DCPU=ON \
-DGPU_MALI=OFF \
-DGPU_CL=ON \
-DFPGA=OFF
......@@ -181,6 +187,7 @@ function build_linux_armv7_cpu_only() {
-B"../build/armv7_linux" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
-DCPU=ON \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
......@@ -195,6 +202,7 @@ function build_linux_armv7_gpu() {
-B"../build/armv7_linux" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
-DCPU=ON \
-DGPU_MALI=ON \
-DGPU_CL=ON \
-DFPGA=OFF
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册