提交 3f442466 编写于 作者: H hjchen2

improve program optimize, add assign value and flatten2 op

上级 8eb2ae94
...@@ -152,6 +152,7 @@ LOAD_OP1(prelu, CPU); ...@@ -152,6 +152,7 @@ LOAD_OP1(prelu, CPU);
#endif #endif
#ifdef FLATTEN_OP #ifdef FLATTEN_OP
LOAD_OP1(flatten, CPU); LOAD_OP1(flatten, CPU);
LOAD_OP1(flatten2, CPU);
#endif #endif
#ifdef FUSION_CONVBNADDRELU_OP #ifdef FUSION_CONVBNADDRELU_OP
LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA); LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
...@@ -331,3 +332,6 @@ LOAD_OP1(pad2d, CPU); ...@@ -331,3 +332,6 @@ LOAD_OP1(pad2d, CPU);
#ifdef ONE_HOT_OP #ifdef ONE_HOT_OP
LOAD_OP1(one_hot, CPU); LOAD_OP1(one_hot, CPU);
#endif #endif
#ifdef ASSIGN_VALUE_OP
LOAD_OP1(assign_value, CPU);
#endif
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "framework/program/program-optimize/program_optimize.h" #include "framework/program/program-optimize/program_optimize.h"
#include <algorithm> #include <algorithm>
#include <utility>
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -22,7 +23,6 @@ namespace framework { ...@@ -22,7 +23,6 @@ namespace framework {
std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize( std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
std::shared_ptr<ProgramDesc> ori_des, bool add_split) { std::shared_ptr<ProgramDesc> ori_des, bool add_split) {
// ProgramDesc *optimize_program = new ProgramDesc(*ori_des);
std::shared_ptr<ProgramDesc> optimize_program = std::shared_ptr<ProgramDesc> optimize_program =
std::make_shared<ProgramDesc>(*ori_des); std::make_shared<ProgramDesc>(*ori_des);
current_block_ = optimize_program->Blocks().size(); current_block_ = optimize_program->Blocks().size();
...@@ -35,51 +35,35 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize( ...@@ -35,51 +35,35 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
std::pair<std::shared_ptr<Node>, std::pair<std::shared_ptr<Node>,
std::unordered_map<std::string, std::shared_ptr<Node>>>>> std::unordered_map<std::string, std::shared_ptr<Node>>>>>
type_map; type_map;
std::unordered_map<std::string, bool> output_has;
std::vector<std::shared_ptr<Node>> nodes; std::vector<std::shared_ptr<Node>> nodes;
std::shared_ptr<Node> begin_node; std::shared_ptr<Node> begin_node;
auto block = optimize_program->Block(i); auto block = optimize_program->Block(i);
// DLOG << " ops size: " << block->Ops().size();
for (int j = 0; j < block->Ops().size(); ++j) { for (int j = 0; j < block->Ops().size(); ++j) {
auto op = block->Ops()[j]; auto op = block->Ops()[j];
auto op_type = op->Type();
if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
LOG(kLOG_ERROR) << "has not support op return null "
<< " op type: " << op->Type();
return nullptr;
}
std::shared_ptr<Node> node = std::make_shared<Node>(op); std::shared_ptr<Node> node = std::make_shared<Node>(op);
nodes.push_back(node);
//
type_map[op->Type()].push_back({node, output_nodes});
if (j == 0) { if (j == 0) {
begin_node = node; begin_node = node;
} }
auto input_keys = op_input_output_key.at(op->Type()).first; const std::string op_type = op->Type();
for (auto input_key : input_keys) { nodes.push_back(node);
auto op_inputs = op->Input(input_key); type_map[op_type].push_back({node, output_nodes});
for (int l = 0; l < op_inputs.size(); ++l) { const VariableNameMap &op_inputs = op->GetInputs();
std::string input_key = op_inputs[l]; const VariableNameMap &op_outpus = op->GetOutputs();
if (output_nodes.find(input_key) != output_nodes.end()) {
auto input_node = output_nodes[input_key]; for (const auto &input : op_inputs) {
for (const auto &input_name : input.second) {
if (output_nodes.find(input_name) != output_nodes.end()) {
auto input_node = output_nodes[input_name];
*input_node > node; *input_node > node;
} }
} }
} }
auto output_keys = op_input_output_key.at(op_type).second; for (const auto &output : op_outpus) {
for (const auto &output_name : output.second) {
for (auto output_key : output_keys) { output_nodes[output_name] = node;
auto op_outputs = op->Output(output_key);
for (int k = 0; k < op_outputs.size(); ++k) {
output_nodes[op_outputs[k]] = node;
} }
} }
} }
...@@ -97,14 +81,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize( ...@@ -97,14 +81,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
auto depth = matcher->BeginNode().Depth(); auto depth = matcher->BeginNode().Depth();
auto sub_node = match_node->To(depth); auto sub_node = match_node->To(depth);
// DLOG << " sub node: " << *sub_node; // DLOG << " sub node: " << *sub_node;
if (*sub_node == matcher->BeginNode()) { if (*sub_node == matcher->BeginNode()) {
bool can_folder = true; bool can_folder = true;
auto relationship_map = sub_node->Relationship(); auto relationship_map = sub_node->Relationship();
for (auto to_check : matcher->NeedCheck()) { for (auto to_check : matcher->NeedCheck()) {
// if (node_has)
auto nodes = (*sub_node)[to_check.first]; auto nodes = (*sub_node)[to_check.first];
for (auto node : nodes) { for (auto node : nodes) {
auto inputs_to_check = auto inputs_to_check =
...@@ -126,13 +109,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize( ...@@ -126,13 +109,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
continue; continue;
} }
// DLOG << " match success " << " fusion node: \n" <<
// matcher->BeginNode() << "\nsub node: \n" << *sub_node;
// DLOG << "match node\n"<< *match_node;
std::vector<std::shared_ptr<Node>> removed_nodes; std::vector<std::shared_ptr<Node>> removed_nodes;
matcher->FolderNodes(match_node.get(), &removed_nodes); matcher->FolderNodes(match_node.get(), &removed_nodes);
for (int k = removed_nodes.size() - 1; k >= 0; --k) { for (int k = removed_nodes.size() - 1; k >= 0; --k) {
auto removed_node = removed_nodes[k]; auto removed_node = removed_nodes[k];
auto removed_ite = auto removed_ite =
...@@ -170,12 +148,12 @@ void ProgramOptimize::GenerateOps( ...@@ -170,12 +148,12 @@ void ProgramOptimize::GenerateOps(
Node *current_node) { Node *current_node) {
if (current_node->inputs_.size() > 1 && if (current_node->inputs_.size() > 1 &&
input_node != current_node->inputs_.back()) { input_node != current_node->inputs_.back()) {
DLOG << " current type " << current_node->type_; DLOG << " current type " << current_node->Type();
DLOG << " inputs size of current node > 0 "; DLOG << " inputs size of current node > 0 ";
for (int i = 0; i < current_node->inputs_.size(); ++i) { for (int i = 0; i < current_node->inputs_.size(); ++i) {
DLOG << " input i: " << current_node->inputs_[i]->type_; DLOG << " input i: " << current_node->inputs_[i]->Type();
} }
return; return;
...@@ -201,9 +179,11 @@ void ProgramOptimize::GenerateOps( ...@@ -201,9 +179,11 @@ void ProgramOptimize::GenerateOps(
} }
bool can_add_split = false; bool can_add_split = false;
const auto current_desc = current_node->OpDescOfNode();
const VariableNameMap &current_op_inputs = current_desc->GetInputs();
const VariableNameMap &current_op_outputs = current_desc->GetOutputs();
// 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持 // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
if (current_node->outputs_.size() > 1 && if (current_node->outputs_.size() > 1 && current_op_outputs.size() == 1) {
op_input_output_key[current_node->op_desc_->type_].second.size() == 1) {
can_add_split = true; can_add_split = true;
// 遍历当前节点的 output 节点 // 遍历当前节点的 output 节点
...@@ -217,18 +197,15 @@ void ProgramOptimize::GenerateOps( ...@@ -217,18 +197,15 @@ void ProgramOptimize::GenerateOps(
//与节点关联的 OpDesc //与节点关联的 OpDesc
std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_; std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_;
//获取这个 op 的 inputs key 和 outputs key //获取这个 op 的 inputs key 和 outputs key
auto inputs_and_outputs = op_input_output_key[op_desc->type_]; const VariableNameMap &op_inputs = op_desc->GetInputs();
const VariableNameMap &op_outputs = op_desc->GetOutputs();
//判断现在 是否存在这个 op //判断现在 是否存在这个 op
//判断这个 output 和 input key 的 size 等于 1 //判断这个 output 和 input key 的 size 等于 1
if (op_input_output_key.find(op_desc->type_) != if (op_outputs.size() == 1 && op_inputs.size() == 1) {
op_input_output_key.end() && auto inputs_of_output = op_inputs.begin()->second;
inputs_and_outputs.first.size() == 1 && auto outputs_of_output = op_outputs.begin()->second;
inputs_and_outputs.second.size() == 1) {
auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]);
auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]);
// 判断一下, 如果输入和输出没有同名, 是支持的 // 判断一下, 如果输入和输出没有同名, 是支持的
for (int i = 0; i < inputs_of_output.size(); ++i) { for (int i = 0; i < inputs_of_output.size(); ++i) {
...@@ -243,7 +220,7 @@ void ProgramOptimize::GenerateOps( ...@@ -243,7 +220,7 @@ void ProgramOptimize::GenerateOps(
} }
} }
} else { // 如果模型中包含没有的 op, 则不支持添加 split } else { // 如果模型中包含没有的 op, 则不支持添加 split
DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_; DLOG << "找不到 这个 op 类型: " << output->op_desc_->Type();
can_add_split = false; can_add_split = false;
} }
} }
...@@ -312,9 +289,6 @@ void ProgramOptimize::GenerateOps( ...@@ -312,9 +289,6 @@ void ProgramOptimize::GenerateOps(
void ProgramOptimize::GenerateOps( void ProgramOptimize::GenerateOps(
std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, Node *begin_node, std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, Node *begin_node,
bool can_add_split) { bool can_add_split) {
// std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
// Node *input_node, Node *current_node, bool adding_thread, int
// thread_num
if (can_add_split) { if (can_add_split) {
this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr); this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr);
} else { } else {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#include "operators/assign_value_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void AssignValueOp<Dtype, T>::InferShape() const {
const auto &shape = this->param_.shape_;
this->param_.output_->Resize(framework::make_ddim(shape));
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(assign_value, ops::AssignValueOp);
#endif
#endif // ASSIGN_VALUE_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/assign_value_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
DECLARE_OPERATOR(AssignValue, AssignValueParam, AssignValueKernel);
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -31,29 +31,22 @@ void FlattenOp<DeviceType, T>::InferShape() const { ...@@ -31,29 +31,22 @@ void FlattenOp<DeviceType, T>::InferShape() const {
"The axis should be greater than or equal to 0."); "The axis should be greater than or equal to 0.");
auto &in_dims = this->param_.InputX()->dims(); auto &in_dims = this->param_.InputX()->dims();
// const auto &in_dims = ctx->GetInputDim("X");
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
axis <= in_dims.size(), axis <= in_dims.size(),
"The axis should be less than or equal to input tensor's rank."); "The axis should be less than or equal to input tensor's rank.");
const auto &out_dims = GetOutputShape(axis, in_dims); const auto &out_dims = GetOutputShape(axis, in_dims);
this->param_.Out()->Resize(in_dims); this->param_.Out()->Resize(in_dims);
// todo supprot lodtensor
// if (in_dims[0] == out_dims[0]) {
// // Only pass LoD when the first dimension of output and Input(X)
// // are the same.
// ctx->ShareLoD("X", "Out");
// }
} }
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(flatten, ops::FlattenOp); REGISTER_OPERATOR_CPU(flatten, ops::FlattenOp);
#endif REGISTER_OPERATOR_CPU(flatten2, ops::Flatten2Op);
#ifdef PADDLE_MOBILE_FPGA
#endif #endif
#endif #endif // FLATTEN_OP
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
inline std::vector<int32_t> GetOutputShape(const int axis, inline std::vector<int32_t> GetOutputShape(const int axis,
const framework::DDim &in_dims) { const framework::DDim &in_dims) {
int64_t outer = 1, inner = 1; int64_t outer = 1, inner = 1;
...@@ -40,7 +41,6 @@ inline std::vector<int32_t> GetOutputShape(const int axis, ...@@ -40,7 +41,6 @@ inline std::vector<int32_t> GetOutputShape(const int axis,
out_shape[1] = static_cast<int>(inner); out_shape[1] = static_cast<int>(inner);
return out_shape; return out_shape;
} }
using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class FlattenOp : public framework::OperatorWithKernel< class FlattenOp : public framework::OperatorWithKernel<
...@@ -56,6 +56,15 @@ class FlattenOp : public framework::OperatorWithKernel< ...@@ -56,6 +56,15 @@ class FlattenOp : public framework::OperatorWithKernel<
void InferShape() const override; void InferShape() const override;
}; };
template <typename DeviceType, typename T>
class Flatten2Op : public FlattenOp<DeviceType, T> {
public:
Flatten2Op(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs, framework::Scope *scope)
: FlattenOp<DeviceType, T>(type, inputs, outputs, attrs, scope) {}
};
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#include "operators/kernel/assign_value_kernel.h"
#include "framework/data_type.h"
namespace paddle_mobile {
namespace operators {
struct AssignValueOpFunctor {
framework::LoDTensor* output_;
const std::vector<int> shape_;
const std::vector<int> int32_values_;
const std::vector<float> fp32_values_;
AssignValueOpFunctor(framework::LoDTensor* output,
const std::vector<int>& shape,
const std::vector<float>& fp32_values,
const std::vector<int>& int32_values)
: output_(output),
shape_(shape),
int32_values_(int32_values),
fp32_values_(fp32_values) {}
template <typename T>
inline void apply() const {
PADDLE_MOBILE_THROW_EXCEPTION("Assign value: not supported data type.");
}
};
template <>
inline void AssignValueOpFunctor::apply<int>() const {
framework::TensorFromVector<int>(int32_values_, output_);
output_->Resize(framework::make_ddim(shape_));
}
template <>
inline void AssignValueOpFunctor::apply<float>() const {
framework::TensorFromVector<float>(fp32_values_, output_);
output_->Resize(framework::make_ddim(shape_));
}
template <>
bool AssignValueKernel<CPU, float>::Init(AssignValueParam<CPU>* param) {
return true;
}
template <>
void AssignValueKernel<CPU, float>::Compute(
const AssignValueParam<CPU>& param) {
framework::VisitDataType(
framework::ToDataType(param.dtype_),
AssignValueOpFunctor(param.output_, param.shape_, param.fp32_values_,
param.int32_values_));
}
} // namespace operators
} // namespace paddle_mobile
#endif // ASSIGN_VALUE_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#pragma once
#include <vector>
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype>
class AssignValueParam : public OpParam {
public:
AssignValueParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
output_ = GET_VAR_AS_LOD_TENSOR("Out", outputs, *scope);
shape_ = OpParam::GetAttr<std::vector<int>>("shape", attrs);
fp32_values_ = OpParam::GetAttr<std::vector<float>>("fp32_values", attrs);
int32_values_ = OpParam::GetAttr<std::vector<int>>("int32_values", attrs);
dtype_ = OpParam::GetAttr<int>("dtype", attrs);
}
public:
framework::LoDTensor *output_;
std::vector<int> shape_;
std::vector<float> fp32_values_;
std::vector<int> int32_values_;
int dtype_;
};
DECLARE_KERNEL(AssignValue, AssignValueParam);
} // namespace operators
} // namespace paddle_mobile
#endif // ASSIGN_VALUE_OP
...@@ -22,9 +22,6 @@ limitations under the License. */ ...@@ -22,9 +22,6 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
template <typename Dtype> template <typename Dtype>
class BeamSearchParam : public OpParam { class BeamSearchParam : public OpParam {
public: public:
......
...@@ -22,9 +22,6 @@ limitations under the License. */ ...@@ -22,9 +22,6 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
template <typename Dtype> template <typename Dtype>
class OnehotParam : public OpParam { class OnehotParam : public OpParam {
public: public:
......
...@@ -434,6 +434,12 @@ class OpParam { ...@@ -434,6 +434,12 @@ class OpParam {
} }
}; };
#define GET_VAR_AS_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::Tensor>(name, name_dict, scope)
#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
template <typename Dtype> template <typename Dtype>
class ConvParam : public OpParam { class ConvParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
......
...@@ -320,6 +320,7 @@ if(NOT FOUND_MATCH) ...@@ -320,6 +320,7 @@ if(NOT FOUND_MATCH)
set(BEAM_SEARCH_DECODE_OP ON) set(BEAM_SEARCH_DECODE_OP ON)
set(PAD2D_OP ON) set(PAD2D_OP ON)
set(ONE_HOT_OP ON) set(ONE_HOT_OP ON)
set(ASSIGN_VALUE_OP ON)
endif() endif()
# option(BATCHNORM_OP "" ON) # option(BATCHNORM_OP "" ON)
...@@ -646,3 +647,6 @@ endif() ...@@ -646,3 +647,6 @@ endif()
if (ONE_HOT_OP) if (ONE_HOT_OP)
add_definitions(-DONE_HOT_OP) add_definitions(-DONE_HOT_OP)
endif() endif()
if (ASSIGN_VALUE_OP)
add_definitions(-DASSIGN_VALUE_OP)
endif()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册