From 63df309138134d40bded1ef6f1dd888f4f7e7546 Mon Sep 17 00:00:00 2001 From: hjchen2 Date: Sat, 30 Mar 2019 21:57:22 +0800 Subject: [PATCH] improve program optimize, add assign value and flatten2 op --- src/framework/load_ops.h | 4 + .../program-optimize/program_optimize.cpp | 82 +++++++------------ src/operators/assign_value_op.cpp | 37 +++++++++ src/operators/assign_value_op.h | 33 ++++++++ src/operators/flatten_op.cpp | 13 +-- src/operators/flatten_op.h | 11 ++- .../kernel/arm/assign_value_kernel.cpp | 73 +++++++++++++++++ src/operators/kernel/assign_value_kernel.h | 53 ++++++++++++ src/operators/kernel/beam_search_kernel.h | 3 - src/operators/kernel/one_hot_kernel.h | 3 - src/operators/op_param.h | 6 ++ tools/op.cmake | 4 + 12 files changed, 251 insertions(+), 71 deletions(-) create mode 100644 src/operators/assign_value_op.cpp create mode 100644 src/operators/assign_value_op.h create mode 100644 src/operators/kernel/arm/assign_value_kernel.cpp create mode 100644 src/operators/kernel/assign_value_kernel.h diff --git a/src/framework/load_ops.h b/src/framework/load_ops.h index 983a544cda..88dc901b4c 100644 --- a/src/framework/load_ops.h +++ b/src/framework/load_ops.h @@ -152,6 +152,7 @@ LOAD_OP1(prelu, CPU); #endif #ifdef FLATTEN_OP LOAD_OP1(flatten, CPU); +LOAD_OP1(flatten2, CPU); #endif #ifdef FUSION_CONVBNADDRELU_OP LOAD_OP3(fusion_conv_bn_add_relu, CPU, GPU_CL, FPGA); @@ -331,3 +332,6 @@ LOAD_OP1(pad2d, CPU); #ifdef ONE_HOT_OP LOAD_OP1(one_hot, CPU); #endif +#ifdef ASSIGN_VALUE_OP +LOAD_OP1(assign_value, CPU); +#endif diff --git a/src/framework/program/program-optimize/program_optimize.cpp b/src/framework/program/program-optimize/program_optimize.cpp index ed523a9851..eba27314ad 100644 --- a/src/framework/program/program-optimize/program_optimize.cpp +++ b/src/framework/program/program-optimize/program_optimize.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "framework/program/program-optimize/program_optimize.h" #include +#include #include "framework/program/program-optimize/fusion_op_register.h" namespace paddle_mobile { @@ -22,7 +23,6 @@ namespace framework { std::shared_ptr ProgramOptimize::FusionOptimize( std::shared_ptr ori_des, bool add_split) { - // ProgramDesc *optimize_program = new ProgramDesc(*ori_des); std::shared_ptr optimize_program = std::make_shared(*ori_des); current_block_ = optimize_program->Blocks().size(); @@ -35,51 +35,35 @@ std::shared_ptr ProgramOptimize::FusionOptimize( std::pair, std::unordered_map>>>> type_map; - - std::unordered_map output_has; - std::vector> nodes; - std::shared_ptr begin_node; + auto block = optimize_program->Block(i); - // DLOG << " ops size: " << block->Ops().size(); for (int j = 0; j < block->Ops().size(); ++j) { auto op = block->Ops()[j]; - auto op_type = op->Type(); - if (op_input_output_key.find(op->Type()) == op_input_output_key.end()) { - LOG(kLOG_ERROR) << "has not support op return null " - << " op type: " << op->Type(); - return nullptr; - } - std::shared_ptr node = std::make_shared(op); - nodes.push_back(node); - - // - type_map[op->Type()].push_back({node, output_nodes}); - if (j == 0) { begin_node = node; } - auto input_keys = op_input_output_key.at(op->Type()).first; - for (auto input_key : input_keys) { - auto op_inputs = op->Input(input_key); - for (int l = 0; l < op_inputs.size(); ++l) { - std::string input_key = op_inputs[l]; - if (output_nodes.find(input_key) != output_nodes.end()) { - auto input_node = output_nodes[input_key]; + const std::string op_type = op->Type(); + nodes.push_back(node); + type_map[op_type].push_back({node, output_nodes}); + const VariableNameMap &op_inputs = op->GetInputs(); + const VariableNameMap &op_outpus = op->GetOutputs(); + + for (const auto &input : op_inputs) { + for (const auto &input_name : input.second) { + if (output_nodes.find(input_name) != output_nodes.end()) { + auto input_node = output_nodes[input_name]; *input_node > node; } } } - auto output_keys = op_input_output_key.at(op_type).second; - - for (auto output_key : output_keys) { - auto op_outputs = op->Output(output_key); - for (int k = 0; k < op_outputs.size(); ++k) { - output_nodes[op_outputs[k]] = node; + for (const auto &output : op_outpus) { + for (const auto &output_name : output.second) { + output_nodes[output_name] = node; } } } @@ -97,14 +81,13 @@ std::shared_ptr ProgramOptimize::FusionOptimize( auto depth = matcher->BeginNode().Depth(); auto sub_node = match_node->To(depth); - // DLOG << " sub node: " << *sub_node; + // DLOG << " sub node: " << *sub_node; if (*sub_node == matcher->BeginNode()) { bool can_folder = true; auto relationship_map = sub_node->Relationship(); for (auto to_check : matcher->NeedCheck()) { - // if (node_has) auto nodes = (*sub_node)[to_check.first]; for (auto node : nodes) { auto inputs_to_check = @@ -126,13 +109,8 @@ std::shared_ptr ProgramOptimize::FusionOptimize( continue; } - // DLOG << " match success " << " fusion node: \n" << - // matcher->BeginNode() << "\nsub node: \n" << *sub_node; - // DLOG << "match node\n"<< *match_node; - std::vector> removed_nodes; matcher->FolderNodes(match_node.get(), &removed_nodes); - for (int k = removed_nodes.size() - 1; k >= 0; --k) { auto removed_node = removed_nodes[k]; auto removed_ite = @@ -170,12 +148,12 @@ void ProgramOptimize::GenerateOps( Node *current_node) { if (current_node->inputs_.size() > 1 && input_node != current_node->inputs_.back()) { - DLOG << " current type " << current_node->type_; + DLOG << " current type " << current_node->Type(); DLOG << " inputs size of current node > 0 "; for (int i = 0; i < current_node->inputs_.size(); ++i) { - DLOG << " input i: " << current_node->inputs_[i]->type_; + DLOG << " input i: " << current_node->inputs_[i]->Type(); } return; @@ -201,9 +179,11 @@ void ProgramOptimize::GenerateOps( } bool can_add_split = false; + const auto current_desc = current_node->OpDescOfNode(); + const VariableNameMap ¤t_op_inputs = current_desc->GetInputs(); + const VariableNameMap ¤t_op_outputs = current_desc->GetOutputs(); // 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持 - if (current_node->outputs_.size() > 1 && - op_input_output_key[current_node->op_desc_->type_].second.size() == 1) { + if (current_node->outputs_.size() > 1 && current_op_outputs.size() == 1) { can_add_split = true; // 遍历当前节点的 output 节点 @@ -217,18 +197,15 @@ void ProgramOptimize::GenerateOps( //与节点关联的 OpDesc std::shared_ptr &op_desc = output->op_desc_; - //获取这个 op 的 inputs key 和 outputs key - auto inputs_and_outputs = op_input_output_key[op_desc->type_]; + const VariableNameMap &op_inputs = op_desc->GetInputs(); + const VariableNameMap &op_outputs = op_desc->GetOutputs(); //判断现在 是否存在这个 op //判断这个 output 和 input key 的 size 等于 1 - if (op_input_output_key.find(op_desc->type_) != - op_input_output_key.end() && - inputs_and_outputs.first.size() == 1 && - inputs_and_outputs.second.size() == 1) { - auto inputs_of_output = op_desc->Input(inputs_and_outputs.first[0]); - auto outputs_of_output = op_desc->Output(inputs_and_outputs.second[0]); + if (op_outputs.size() == 1 && op_inputs.size() == 1) { + auto inputs_of_output = op_inputs.begin()->second; + auto outputs_of_output = op_outputs.begin()->second; // 判断一下, 如果输入和输出没有同名, 是支持的 for (int i = 0; i < inputs_of_output.size(); ++i) { @@ -243,7 +220,7 @@ void ProgramOptimize::GenerateOps( } } } else { // 如果模型中包含没有的 op, 则不支持添加 split - DLOG << "找不到 这个 op 类型: " << output->op_desc_->type_; + DLOG << "找不到 这个 op 类型: " << output->op_desc_->Type(); can_add_split = false; } } @@ -312,9 +289,6 @@ void ProgramOptimize::GenerateOps( void ProgramOptimize::GenerateOps( std::vector> *op_descs, Node *begin_node, bool can_add_split) { - // std::vector> *op_desc, - // Node *input_node, Node *current_node, bool adding_thread, int - // thread_num if (can_add_split) { this->GenerateOps(op_descs, begin_node, begin_node, false, -1, nullptr); } else { diff --git a/src/operators/assign_value_op.cpp b/src/operators/assign_value_op.cpp new file mode 100644 index 0000000000..49494929de --- /dev/null +++ b/src/operators/assign_value_op.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ASSIGN_VALUE_OP + +#include "operators/assign_value_op.h" + +namespace paddle_mobile { +namespace operators { + +template +void AssignValueOp::InferShape() const { + const auto &shape = this->param_.shape_; + this->param_.output_->Resize(framework::make_ddim(shape)); +} + +} // namespace operators +} // namespace paddle_mobile + +namespace ops = paddle_mobile::operators; + +#ifdef PADDLE_MOBILE_CPU +REGISTER_OPERATOR_CPU(assign_value, ops::AssignValueOp); +#endif + +#endif // ASSIGN_VALUE_OP diff --git a/src/operators/assign_value_op.h b/src/operators/assign_value_op.h new file mode 100644 index 0000000000..ce319d333a --- /dev/null +++ b/src/operators/assign_value_op.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ASSIGN_VALUE_OP + +#pragma once + +#include + +#include "framework/operator.h" +#include "operators/kernel/assign_value_kernel.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +DECLARE_OPERATOR(AssignValue, AssignValueParam, AssignValueKernel); + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/flatten_op.cpp b/src/operators/flatten_op.cpp index 932f780d03..844053b21c 100644 --- a/src/operators/flatten_op.cpp +++ b/src/operators/flatten_op.cpp @@ -31,29 +31,22 @@ void FlattenOp::InferShape() const { "The axis should be greater than or equal to 0."); auto &in_dims = this->param_.InputX()->dims(); - // const auto &in_dims = ctx->GetInputDim("X"); PADDLE_MOBILE_ENFORCE( axis <= in_dims.size(), "The axis should be less than or equal to input tensor's rank."); const auto &out_dims = GetOutputShape(axis, in_dims); this->param_.Out()->Resize(in_dims); - // todo supprot lodtensor - // if (in_dims[0] == out_dims[0]) { - // // Only pass LoD when the first dimension of output and Input(X) - // // are the same. - // ctx->ShareLoD("X", "Out"); - // } } } // namespace operators } // namespace paddle_mobile namespace ops = paddle_mobile::operators; + #ifdef PADDLE_MOBILE_CPU REGISTER_OPERATOR_CPU(flatten, ops::FlattenOp); -#endif -#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_CPU(flatten2, ops::Flatten2Op); #endif -#endif +#endif // FLATTEN_OP diff --git a/src/operators/flatten_op.h b/src/operators/flatten_op.h index daad2d82d8..ef97994dc1 100644 --- a/src/operators/flatten_op.h +++ b/src/operators/flatten_op.h @@ -25,6 +25,7 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { + inline std::vector GetOutputShape(const int axis, const framework::DDim &in_dims) { int64_t outer = 1, inner = 1; @@ -40,7 +41,6 @@ inline std::vector GetOutputShape(const int axis, out_shape[1] = static_cast(inner); return out_shape; } -using paddle_mobile::framework::Tensor; template class FlattenOp : public framework::OperatorWithKernel< @@ -56,6 +56,15 @@ class FlattenOp : public framework::OperatorWithKernel< void InferShape() const override; }; +template +class Flatten2Op : public FlattenOp { + public: + Flatten2Op(const std::string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, + const framework::AttributeMap &attrs, framework::Scope *scope) + : FlattenOp(type, inputs, outputs, attrs, scope) {} +}; + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/arm/assign_value_kernel.cpp b/src/operators/kernel/arm/assign_value_kernel.cpp new file mode 100644 index 0000000000..7390f77ed1 --- /dev/null +++ b/src/operators/kernel/arm/assign_value_kernel.cpp @@ -0,0 +1,73 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ASSIGN_VALUE_OP + +#include "operators/kernel/assign_value_kernel.h" +#include "framework/data_type.h" + +namespace paddle_mobile { +namespace operators { + +struct AssignValueOpFunctor { + framework::LoDTensor* output_; + const std::vector shape_; + const std::vector int32_values_; + const std::vector fp32_values_; + + AssignValueOpFunctor(framework::LoDTensor* output, + const std::vector& shape, + const std::vector& fp32_values, + const std::vector& int32_values) + : output_(output), + shape_(shape), + int32_values_(int32_values), + fp32_values_(fp32_values) {} + + template + inline void apply() const { + PADDLE_MOBILE_THROW_EXCEPTION("Assign value: not supported data type."); + } +}; + +template <> +inline void AssignValueOpFunctor::apply() const { + framework::TensorFromVector(int32_values_, output_); + output_->Resize(framework::make_ddim(shape_)); +} + +template <> +inline void AssignValueOpFunctor::apply() const { + framework::TensorFromVector(fp32_values_, output_); + output_->Resize(framework::make_ddim(shape_)); +} + +template <> +bool AssignValueKernel::Init(AssignValueParam* param) { + return true; +} + +template <> +void AssignValueKernel::Compute( + const AssignValueParam& param) { + framework::VisitDataType( + framework::ToDataType(param.dtype_), + AssignValueOpFunctor(param.output_, param.shape_, param.fp32_values_, + param.int32_values_)); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif // ASSIGN_VALUE_OP diff --git a/src/operators/kernel/assign_value_kernel.h b/src/operators/kernel/assign_value_kernel.h new file mode 100644 index 0000000000..5fae921876 --- /dev/null +++ b/src/operators/kernel/assign_value_kernel.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ASSIGN_VALUE_OP + +#pragma once + +#include +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { +namespace operators { + +template +class AssignValueParam : public OpParam { + public: + AssignValueParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + Scope *scope) + : OpParam(inputs, outputs, attrs, scope) { + output_ = GET_VAR_AS_LOD_TENSOR("Out", outputs, *scope); + shape_ = OpParam::GetAttr>("shape", attrs); + fp32_values_ = OpParam::GetAttr>("fp32_values", attrs); + int32_values_ = OpParam::GetAttr>("int32_values", attrs); + dtype_ = OpParam::GetAttr("dtype", attrs); + } + + public: + framework::LoDTensor *output_; + std::vector shape_; + std::vector fp32_values_; + std::vector int32_values_; + int dtype_; +}; + +DECLARE_KERNEL(AssignValue, AssignValueParam); + +} // namespace operators +} // namespace paddle_mobile + +#endif // ASSIGN_VALUE_OP diff --git a/src/operators/kernel/beam_search_kernel.h b/src/operators/kernel/beam_search_kernel.h index 38fe162b24..bb4a3ced17 100644 --- a/src/operators/kernel/beam_search_kernel.h +++ b/src/operators/kernel/beam_search_kernel.h @@ -22,9 +22,6 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \ - OpParam::GetVarValue(name, name_dict, scope) - template class BeamSearchParam : public OpParam { public: diff --git a/src/operators/kernel/one_hot_kernel.h b/src/operators/kernel/one_hot_kernel.h index fd883cabee..2cb2e59eb3 100644 --- a/src/operators/kernel/one_hot_kernel.h +++ b/src/operators/kernel/one_hot_kernel.h @@ -22,9 +22,6 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \ - OpParam::GetVarValue(name, name_dict, scope) - template class OnehotParam : public OpParam { public: diff --git a/src/operators/op_param.h b/src/operators/op_param.h index e144bf623f..52d6f6693e 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -434,6 +434,12 @@ class OpParam { } }; +#define GET_VAR_AS_TENSOR(name, name_dict, scope) \ + OpParam::GetVarValue(name, name_dict, scope) + +#define GET_VAR_AS_LOD_TENSOR(name, name_dict, scope) \ + OpParam::GetVarValue(name, name_dict, scope) + template class ConvParam : public OpParam { typedef typename DtypeTensorTrait::gtype GType; diff --git a/tools/op.cmake b/tools/op.cmake index 94dc0b4215..209df1dd7d 100755 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -320,6 +320,7 @@ if(NOT FOUND_MATCH) set(BEAM_SEARCH_DECODE_OP ON) set(PAD2D_OP ON) set(ONE_HOT_OP ON) + set(ASSIGN_VALUE_OP ON) endif() # option(BATCHNORM_OP "" ON) @@ -646,3 +647,6 @@ endif() if (ONE_HOT_OP) add_definitions(-DONE_HOT_OP) endif() +if (ASSIGN_VALUE_OP) + add_definitions(-DASSIGN_VALUE_OP) +endif() -- GitLab