提交 63df3091 编写于 作者: H hjchen2

improve program optimize, add assign value and flatten2 op

上级 65a8b291
......@@ -152,6 +152,7 @@ LOAD_OP1(prelu, CPU);
#endif
#ifdef FLATTEN_OP
LOAD_OP1(flatten, CPU);
LOAD_OP1(flatten2, CPU);
#endif
#ifdef FUSION_CONVBNADDRELU_OP
LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA);
......@@ -331,3 +332,6 @@ LOAD_OP1(pad2d, CPU);
#ifdef ONE_HOT_OP
LOAD_OP1(one_hot, CPU);
#endif
#ifdef ASSIGN_VALUE_OP
LOAD_OP1(assign_value, CPU);
#endif
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "framework/program/program-optimize/program_optimize.h"
#include <algorithm>
#include <utility>
#include "framework/program/program-optimize/fusion_op_register.h"
namespace paddle_mobile {
......@@ -22,7 +23,6 @@ namespace framework {
std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
std::shared_ptr<ProgramDesc> ori_des, bool add_split) {
// ProgramDesc *optimize_program = new ProgramDesc(*ori_des);
std::shared_ptr<ProgramDesc> optimize_program =
std::make_shared<ProgramDesc>(*ori_des);
current_block_ = optimize_program->Blocks().size();
......@@ -35,51 +35,35 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
std::pair<std::shared_ptr<Node>,
std::unordered_map<std::string, std::shared_ptr<Node>>>>>
type_map;
std::unordered_map<std::string, bool> output_has;
std::vector<std::shared_ptr<Node>> nodes;
std::shared_ptr<Node> begin_node;
auto block = optimize_program->Block(i);
// DLOG << " ops size: " << block->Ops().size();
for (int j = 0; j < block->Ops().size(); ++j) {
auto op = block->Ops()[j];
auto op_type = op->Type();
if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) {
LOG(kLOG_ERROR) << "has not support op return null "
<< " op type: " << op->Type();
return nullptr;
}
std::shared_ptr<Node> node = std::make_shared<Node>(op);
nodes.push_back(node);
//
type_map[op->Type()].push_back({node, output_nodes});
if (j == 0) {
begin_node = node;
}
auto input_keys = op_input_output_key.at(op->Type()).first;
for (auto input_key : input_keys) {
auto op_inputs = op->Input(input_key);
for (int l = 0; l < op_inputs.size(); ++l) {
std::string input_key = op_inputs[l];
if (output_nodes.find(input_key) != output_nodes.end()) {
auto input_node = output_nodes[input_key];
const std::string op_type = op->Type();
nodes.push_back(node);
type_map[op_type].push_back({node, output_nodes});
const VariableNameMap &op_inputs = op->GetInputs();
const VariableNameMap &op_outpus = op->GetOutputs();
for (const auto &input : op_inputs) {
for (const auto &input_name : input.second) {
if (output_nodes.find(input_name) != output_nodes.end()) {
auto input_node = output_nodes[input_name];
*input_node > node;
}
}
}
auto output_keys = op_input_output_key.at(op_type).second;
for (auto output_key : output_keys) {
auto op_outputs = op->Output(output_key);
for (int k = 0; k < op_outputs.size(); ++k) {
output_nodes[op_outputs[k]] = node;
for (const auto &output : op_outpus) {
for (const auto &output_name : output.second) {
output_nodes[output_name] = node;
}
}
}
......@@ -97,14 +81,13 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
auto depth = matcher->BeginNode().Depth();
auto sub_node = match_node->To(depth);
// DLOG << " sub node: " << *sub_node;
// DLOG << " sub node: " << *sub_node;
if (*sub_node == matcher->BeginNode()) {
bool can_folder = true;
auto relationship_map = sub_node->Relationship();
for (auto to_check : matcher->NeedCheck()) {
// if (node_has)
auto nodes = (*sub_node)[to_check.first];
for (auto node : nodes) {
auto inputs_to_check =
......@@ -126,13 +109,8 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FusionOptimize(
continue;
}
// DLOG << " match success " << " fusion node: \n" <<
// matcher->BeginNode() << "\nsub node: \n" << *sub_node;
// DLOG << "match node\n"<< *match_node;
std::vector<std::shared_ptr<Node>> removed_nodes;
matcher->FolderNodes(match_node.get(), &removed_nodes);
for (int k = removed_nodes.size() - 1; k >= 0; --k) {
auto removed_node = removed_nodes[k];
auto removed_ite =
......@@ -170,12 +148,12 @@ void ProgramOptimize::GenerateOps(
Node *current_node) {
if (current_node->inputs_.size() > 1 &&
input_node != current_node->inputs_.back()) {
DLOG << " current type " << current_node->type_;
DLOG << " current type " << current_node->Type();
DLOG << " inputs size of current node > 0 ";
for (int i = 0; i < current_node->inputs_.size(); ++i) {
DLOG << " input i: " << current_node->inputs_[i]->type_;
DLOG << " input i: " << current_node->inputs_[i]->Type();
}
return;
......@@ -201,9 +179,11 @@ void ProgramOptimize::GenerateOps(
}
bool can_add_split = false;
const auto current_desc = current_node->OpDescOfNode();
const VariableNameMap &current_op_inputs = current_desc->GetInputs();
const VariableNameMap &current_op_outputs = current_desc->GetOutputs();
// 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
if (current_node->outputs_.size() > 1 &&
op_input_output_key[current_node->op_desc_->type_].second.size() == 1) {
if (current_node->outputs_.size() > 1 && current_op_outputs.size() == 1) {
can_add_split = true;
// 遍历当前节点的 output 节点
......@@ -217,18 +197,15 @@ void ProgramOptimize::GenerateOps(
//与节点关联的 OpDesc
std::shared_ptr<framework::OpDesc> &op_desc = output->op_desc_;
//获取这个 op 的 inputs key 和 outputs key
auto inputs_and_outputs = op_input_output_key[op_desc->type_];
const VariableNameMap &op_inputs = op_desc->GetInputs();
const VariableNameMap &op_outputs = op_desc->GetOutputs();
//判断现在 是否存在这个 op
//判断这个 output 和 input key 的 size 等于 1
if (op_input_output_key.find(op_desc->type_) !=
op_input_output_key.end() &&
inputs_and_outputs.first.size() == 1 &&
inputs_and_outputs.second.size() == 1) {
auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]);
auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]);
if (op_outputs.size() == 1 && op_inputs.size() == 1) {
auto inputs_of_output = op_inputs.begin()->second;
auto outputs_of_output = op_outputs.begin()->second;
// 判断一下, 如果输入和输出没有同名, 是支持的
for (int i = 0; i < inputs_of_output.size(); ++i) {
......@@ -243,7 +220,7 @@ void ProgramOptimize::GenerateOps(
}
}
} else { // 如果模型中包含没有的 op, 则不支持添加 split
DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_;
DLOG << "找不到 这个 op 类型: " << output->op_desc_->Type();
can_add_split = false;
}
}
......@@ -312,9 +289,6 @@ void ProgramOptimize::GenerateOps(
void ProgramOptimize::GenerateOps(
std::vector<std::shared_ptr<framework::OpDesc>> *op_descs, Node *begin_node,
bool can_add_split) {
// std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
// Node *input_node, Node *current_node, bool adding_thread, int
// thread_num
if (can_add_split) {
this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr);
} else {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#include "operators/assign_value_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void AssignValueOp<Dtype, T>::InferShape() const {
const auto &shape = this->param_.shape_;
this->param_.output_->Resize(framework::make_ddim(shape));
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(assign_value, ops::AssignValueOp);
#endif
#endif // ASSIGN_VALUE_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/assign_value_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
DECLARE_OPERATOR(AssignValue, AssignValueParam, AssignValueKernel);
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -31,29 +31,22 @@ void FlattenOp<DeviceType, T>::InferShape() const {
"The axis should be greater than or equal to 0.");
auto &in_dims = this->param_.InputX()->dims();
// const auto &in_dims = ctx->GetInputDim("X");
PADDLE_MOBILE_ENFORCE(
axis <= in_dims.size(),
"The axis should be less than or equal to input tensor's rank.");
const auto &out_dims = GetOutputShape(axis, in_dims);
this->param_.Out()->Resize(in_dims);
// todo supprot lodtensor
// if (in_dims[0] == out_dims[0]) {
// // Only pass LoD when the first dimension of output and Input(X)
// // are the same.
// ctx->ShareLoD("X", "Out");
// }
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(flatten, ops::FlattenOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_CPU(flatten2, ops::Flatten2Op);
#endif
#endif
#endif // FLATTEN_OP
......@@ -25,6 +25,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
inline std::vector<int32_t> GetOutputShape(const int axis,
const framework::DDim &in_dims) {
int64_t outer = 1, inner = 1;
......@@ -40,7 +41,6 @@ inline std::vector<int32_t> GetOutputShape(const int axis,
out_shape[1] = static_cast<int>(inner);
return out_shape;
}
using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T>
class FlattenOp : public framework::OperatorWithKernel<
......@@ -56,6 +56,15 @@ class FlattenOp : public framework::OperatorWithKernel<
void InferShape() const override;
};
template <typename DeviceType, typename T>
class Flatten2Op : public FlattenOp<DeviceType, T> {
public:
Flatten2Op(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs, framework::Scope *scope)
: FlattenOp<DeviceType, T>(type, inputs, outputs, attrs, scope) {}
};
} // namespace operators
} // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#include "operators/kernel/assign_value_kernel.h"
#include "framework/data_type.h"
namespace paddle_mobile {
namespace operators {
struct AssignValueOpFunctor {
framework::LoDTensor* output_;
const std::vector<int> shape_;
const std::vector<int> int32_values_;
const std::vector<float> fp32_values_;
AssignValueOpFunctor(framework::LoDTensor* output,
const std::vector<int>& shape,
const std::vector<float>& fp32_values,
const std::vector<int>& int32_values)
: output_(output),
shape_(shape),
int32_values_(int32_values),
fp32_values_(fp32_values) {}
template <typename T>
inline void apply() const {
PADDLE_MOBILE_THROW_EXCEPTION("Assign value: not supported data type.");
}
};
template <>
inline void AssignValueOpFunctor::apply<int>() const {
framework::TensorFromVector<int>(int32_values_, output_);
output_->Resize(framework::make_ddim(shape_));
}
template <>
inline void AssignValueOpFunctor::apply<float>() const {
framework::TensorFromVector<float>(fp32_values_, output_);
output_->Resize(framework::make_ddim(shape_));
}
template <>
bool AssignValueKernel<CPU, float>::Init(AssignValueParam<CPU>* param) {
return true;
}
template <>
void AssignValueKernel<CPU, float>::Compute(
const AssignValueParam<CPU>& param) {
framework::VisitDataType(
framework::ToDataType(param.dtype_),
AssignValueOpFunctor(param.output_, param.shape_, param.fp32_values_,
param.int32_values_));
}
} // namespace operators
} // namespace paddle_mobile
#endif // ASSIGN_VALUE_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ASSIGN_VALUE_OP
#pragma once
#include <vector>
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype>
class AssignValueParam : public OpParam {
public:
AssignValueParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
output_ = GET_VAR_AS_LOD_TENSOR("Out", outputs, *scope);
shape_ = OpParam::GetAttr<std::vector<int>>("shape", attrs);
fp32_values_ = OpParam::GetAttr<std::vector<float>>("fp32_values", attrs);
int32_values_ = OpParam::GetAttr<std::vector<int>>("int32_values", attrs);
dtype_ = OpParam::GetAttr<int>("dtype", attrs);
}
public:
framework::LoDTensor *output_;
std::vector<int> shape_;
std::vector<float> fp32_values_;
std::vector<int> int32_values_;
int dtype_;
};
DECLARE_KERNEL(AssignValue, AssignValueParam);
} // namespace operators
} // namespace paddle_mobile
#endif // ASSIGN_VALUE_OP
......@@ -22,9 +22,6 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
template <typename Dtype>
class BeamSearchParam : public OpParam {
public:
......
......@@ -22,9 +22,6 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
template <typename Dtype>
class OnehotParam : public OpParam {
public:
......
......@@ -434,6 +434,12 @@ class OpParam {
}
};
#define GET_VAR_AS_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::Tensor>(name, name_dict, scope)
#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \
OpParam::GetVarValue<framework::LoDTensor>(name, name_dict, scope)
template <typename Dtype>
class ConvParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
......
......@@ -320,6 +320,7 @@ if(NOT FOUND_MATCH)
set(BEAM_SEARCH_DECODE_OP ON)
set(PAD2D_OP ON)
set(ONE_HOT_OP ON)
set(ASSIGN_VALUE_OP ON)
endif()
# option(BATCHNORM_OP "" ON)
......@@ -646,3 +647,6 @@ endif()
if (ONE_HOT_OP)
add_definitions(-DONE_HOT_OP)
endif()
if (ASSIGN_VALUE_OP)
add_definitions(-DASSIGN_VALUE_OP)
endif()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册