提交 a1d200a5 编写于 作者: N nhzlx

cherry-pick from feature/anakin-engine: Anakin support facebox #16111

上级 a32d4200
...@@ -71,6 +71,7 @@ pass_library(transpose_flatten_concat_fuse_pass inference) ...@@ -71,6 +71,7 @@ pass_library(transpose_flatten_concat_fuse_pass inference)
pass_library(identity_scale_op_clean_pass base) pass_library(identity_scale_op_clean_pass base)
pass_library(sync_batch_norm_pass base) pass_library(sync_batch_norm_pass base)
pass_library(runtime_context_cache_pass base) pass_library(runtime_context_cache_pass base)
pass_library(simplify_anakin_detection_pattern_pass inference)
# There may be many transpose-flatten structures in a model, and the output of # There may be many transpose-flatten structures in a model, and the output of
# these structures will be used as inputs to the concat Op. This pattern will # these structures will be used as inputs to the concat Op. This pattern will
...@@ -81,6 +82,10 @@ foreach (index RANGE 3 6) ...@@ -81,6 +82,10 @@ foreach (index RANGE 3 6)
file(APPEND ${pass_file} "USE_PASS(transpose_flatten${index}_concat_fuse_pass);\n") file(APPEND ${pass_file} "USE_PASS(transpose_flatten${index}_concat_fuse_pass);\n")
endforeach() endforeach()
foreach (index RANGE 3 6)
file(APPEND ${pass_file} "USE_PASS(simplify_anakin_detection_pattern_pass${index});\n")
endforeach()
if(WITH_MKLDNN) if(WITH_MKLDNN)
pass_library(mkldnn_placement_pass base mkldnn) pass_library(mkldnn_placement_pass base mkldnn)
pass_library(depthwise_conv_mkldnn_pass base mkldnn) pass_library(depthwise_conv_mkldnn_pass base mkldnn)
......
...@@ -1454,6 +1454,136 @@ PDNode *patterns::TransposeFlattenConcat::operator()( ...@@ -1454,6 +1454,136 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
return concat_out; return concat_out;
} }
PDNode *patterns::AnakinDetectionPattern::operator()(
std::vector<PDNode *> conv_in, int times) {
// The times represents the repeat times of the
// {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
const int kNumFields = 7;
const int kPriorBoxLocOffset = 1;
const int kReshape1Offset = 2;
const int kReshape1OutOffset = 3;
const int kPriorBoxVarOffset = 4;
const int kReshape2Offset = 5;
const int kReshape2OutOffset = 6;
const int kBoxCoderThirdInputOffset = times;
const int kMultiClassSecondInputNmsOffset = times + 1;
std::vector<PDNode *> nodes;
for (int i = 0; i < times; i++) {
nodes.push_back(
pattern->NewNode(GetNodeName("prior_box" + std::to_string(i)))
->assert_is_op("density_prior_box"));
nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i)))
->assert_is_op_output("density_prior_box", "Boxes")
->assert_is_op_input("reshape2", "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape1" + std::to_string(i)))
->assert_is_op("reshape2"));
nodes.push_back(
pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i)))
->assert_is_op_output("reshape2")
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i)))
->assert_is_op_output("density_prior_box", "Variances")
->assert_is_op_input("reshape2", "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape2" + std::to_string(i)))
->assert_is_op("reshape2"));
nodes.push_back(
pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i)))
->assert_is_op_output("reshape2")
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());
}
auto concat_op1 = pattern->NewNode(GetNodeName("concat1"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out1 = pattern->NewNode(GetNodeName("concat1_out"))
->assert_is_op_output("concat")
->AsIntermediate();
auto concat_op2 = pattern->NewNode(GetNodeName("concat2"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out2 = pattern->NewNode(GetNodeName("concat2_out"))
->assert_is_op_output("concat")
->AsIntermediate();
auto box_coder_op = pattern->NewNode(GetNodeName("box_coder"))
->assert_is_op("box_coder")
->assert_op_has_n_inputs("box_coder", 3);
auto box_coder_out = pattern->NewNode(GetNodeName("box_coder_out"))
->assert_is_op_output("box_coder")
->AsIntermediate();
auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms"))
->assert_is_op("multiclass_nms")
->assert_op_has_n_inputs("multiclass_nms", 2);
auto multiclass_nms_out = pattern->NewNode(GetNodeName("multiclass_nms_out"))
->assert_is_op_output("multiclass_nms")
->AsOutput();
std::vector<PDNode *> reshape1_outs;
std::vector<PDNode *> reshape2_outs;
for (int i = 0; i < times; i++) {
conv_in[i]->AsInput();
// prior_box
nodes[i * kNumFields]->LinksFrom({conv_in[i]});
// prior_box box out
nodes[i * kNumFields + kPriorBoxLocOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape1Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxLocOffset]});
// reshape_out
nodes[i * kNumFields + kReshape1OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape1Offset]});
nodes[i * kNumFields + kPriorBoxVarOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape2Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxVarOffset]});
// reshape_out
nodes[i * kNumFields + kReshape2OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape2Offset]});
reshape1_outs.push_back(nodes[i * kNumFields + kReshape1OutOffset]);
reshape2_outs.push_back(nodes[i * kNumFields + kReshape2OutOffset]);
}
concat_op1->LinksFrom(reshape1_outs);
concat_op2->LinksFrom(reshape2_outs);
concat_out1->LinksFrom({concat_op1});
concat_out2->LinksFrom({concat_op2});
conv_in[kBoxCoderThirdInputOffset]->AsInput();
conv_in[kMultiClassSecondInputNmsOffset]->AsInput();
box_coder_op->LinksFrom(
{concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]});
box_coder_out->LinksFrom({box_coder_op});
multiclass_nms_op
->LinksFrom({box_coder_out, conv_in[kMultiClassSecondInputNmsOffset]})
.LinksTo({multiclass_nms_out});
return multiclass_nms_out;
}
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -841,6 +841,21 @@ struct TransposeFlattenConcat : public PatternBase { ...@@ -841,6 +841,21 @@ struct TransposeFlattenConcat : public PatternBase {
} }
}; };
struct AnakinDetectionPattern : public PatternBase {
AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "anakin_detect_pattern") {}
PDNode* operator()(std::vector<PDNode*> conv_inputs, int times);
std::string GetNodeName(const std::string& op_type) {
return PDNodeName(name_scope_, repr_, id_, op_type);
}
PDNode* GetPDNode(const std::string& op_type) {
return pattern->RetrieveNode(GetNodeName(op_type));
}
};
} // namespace patterns } // namespace patterns
// Link two ir::Nodes from each other. // Link two ir::Nodes from each other.
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h"
namespace paddle {
namespace framework {
namespace ir {
template <int times>
std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
const std::string pattern_name =
"simplify_anakin_detection_pattern_pass" + std::to_string(times);
FusePassBase::Init(pattern_name, graph.get());
GraphPatternDetector gpd;
std::vector<PDNode *> input_nodes;
for (int i = 0; i < times; i++) {
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(i))
->assert_is_op_input("density_prior_box", "Input")
->AsInput());
}
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(times))
->assert_is_op_input("box_coder", "TargetBox")
->AsInput());
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(times + 1))
->assert_is_op_input("multiclass_nms", "Scores")
->AsInput());
patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
pattern(input_nodes, times);
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
const int kNumFields = 7;
const int kPriorBoxLocOffset = 1;
const int kReshape1Offset = 2;
const int kReshape1OutOffset = 3;
const int kPriorBoxVarOffset = 4;
const int kReshape2Offset = 5;
const int kReshape2OutOffset = 6;
std::vector<Node *> nodes;
for (int i = 0; i < times; i++) {
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
}
Node *concat_op1 = subgraph.at(pattern.GetPDNode("concat1"));
Node *concat_out1 = subgraph.at(pattern.GetPDNode("concat1_out"));
Node *concat_op2 = subgraph.at(pattern.GetPDNode("concat2"));
Node *concat_out2 = subgraph.at(pattern.GetPDNode("concat2_out"));
Node *box_coder_third_input = subgraph.at(input_nodes[times]);
Node *box_coder_op = subgraph.at(pattern.GetPDNode("box_coder"));
Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out"));
Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]);
Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms"));
Node *multiclass_nms_out =
subgraph.at(pattern.GetPDNode("multiclass_nms_out"));
std::string code_type =
boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type"));
bool box_normalized =
boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized"));
// auto variance =
// boost::get<std::vector<float>>(box_coder_op->Op()->GetAttr("variance"));
int background_label =
boost::get<int>(multiclass_nms->Op()->GetAttr("background_label"));
float score_threshold =
boost::get<float>(multiclass_nms->Op()->GetAttr("score_threshold"));
int nms_top_k = boost::get<int>(multiclass_nms->Op()->GetAttr("nms_top_k"));
float nms_threshold =
boost::get<float>(multiclass_nms->Op()->GetAttr("nms_threshold"));
float nms_eta = boost::get<float>(multiclass_nms->Op()->GetAttr("nms_eta"));
int keep_top_k =
boost::get<int>(multiclass_nms->Op()->GetAttr("keep_top_k"));
std::vector<std::string> concat1_input_names;
for (int i = 0; i < times; i++) {
concat1_input_names.push_back(
nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
}
int axis = boost::get<int>(concat_op1->Op()->GetAttr("axis"));
framework::OpDesc concat1_desc;
concat1_desc.SetType("concat");
concat1_desc.SetInput("X", concat1_input_names);
concat1_desc.SetAttr("axis", axis);
concat1_desc.SetOutput("Out", {concat_out1->Name()});
auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc);
for (int i = 0; i < times; i++) {
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(
new_add_concat_op);
new_add_concat_op->inputs.push_back(
nodes[i * kNumFields + kPriorBoxLocOffset]);
}
framework::OpDesc new_op_desc;
new_op_desc.SetType("detection_out");
new_op_desc.SetInput("PriorBox", {concat_out1->Name()});
new_op_desc.SetInput("TargetBox", {box_coder_third_input->Name()});
new_op_desc.SetInput("Scores", {multiclass_nms_second_input->Name()});
new_op_desc.SetAttr("code_type", code_type);
new_op_desc.SetAttr("box_normalized", box_normalized);
new_op_desc.SetAttr("background_label", background_label);
new_op_desc.SetAttr("score_threshold", score_threshold);
new_op_desc.SetAttr("nms_top_k", nms_top_k);
new_op_desc.SetAttr("nms_threshold", nms_threshold);
new_op_desc.SetAttr("nms_eta", nms_eta);
new_op_desc.SetAttr("keep_top_k", keep_top_k);
new_op_desc.SetOutput("Out", {multiclass_nms_out->Name()});
new_op_desc.Flush();
// Create a new node for the fused op.
auto *detection_out_op = graph->CreateOpNode(&new_op_desc);
std::unordered_set<const Node *> delete_nodes;
for (int i = 0; i < times; i++) {
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(concat_op1);
delete_nodes.insert(nodes[i * kNumFields + kReshape1Offset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape1OutOffset]);
delete_nodes.insert(nodes[i * kNumFields + kPriorBoxVarOffset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape2Offset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape2OutOffset]);
}
delete_nodes.insert(concat_op1);
delete_nodes.insert(concat_op2);
delete_nodes.insert(concat_out2);
delete_nodes.insert(box_coder_op);
delete_nodes.insert(box_coder_out);
delete_nodes.insert(multiclass_nms);
new_add_concat_op->outputs.push_back(concat_out1);
concat_out1->inputs.push_back(new_add_concat_op);
detection_out_op->inputs.push_back(concat_out1);
detection_out_op->inputs.push_back(box_coder_third_input);
detection_out_op->inputs.push_back(multiclass_nms_second_input);
detection_out_op->outputs.push_back(multiclass_nms_out);
concat_out1->outputs.push_back(detection_out_op);
box_coder_third_input->outputs.push_back(detection_out_op);
multiclass_nms_second_input->outputs.push_back(detection_out_op);
multiclass_nms_out->inputs.push_back(detection_out_op);
// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph.get(), delete_nodes);
};
gpd(graph.get(), handler);
return graph;
}
template class SimplifyAnakinDetectionPatternPass<1>;
template class SimplifyAnakinDetectionPatternPass<3>;
template class SimplifyAnakinDetectionPatternPass<4>;
template class SimplifyAnakinDetectionPatternPass<5>;
template class SimplifyAnakinDetectionPatternPass<6>;
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(simplify_anakin_detection_pattern_pass,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<1>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass3,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<3>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass4,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<4>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass5,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<5>);
REGISTER_PASS(simplify_anakin_detection_pattern_pass6,
paddle::framework::ir::SimplifyAnakinDetectionPatternPass<6>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <unordered_set>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
// There may be many transpose-flatten structures in a model, and the output of
// these structures will be used as inputs to the concat Op. This pattern will
// be detected by our pass. The times here represents the repeat times of this
// structure.
template <int times>
class SimplifyAnakinDetectionPatternPass : public FusePassBase {
public:
virtual ~SimplifyAnakinDetectionPatternPass() {}
protected:
std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc DEPS anakin_engine framework_proto scope op_registry) elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc DEPS anakin_engine framework_proto scope op_registry)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op) cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv) cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter) cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling) cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split) cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split) cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op)
anakin_op_converter elementwise_add_op)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL) cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL)
cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax) cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax)
cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op) cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/inference/anakin/convert/batch_norm.h" #include "paddle/fluid/inference/anakin/convert/batch_norm.h"
#include <math.h> #include <math.h>
#include <algorithm>
#include <map> #include <map>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -41,7 +42,6 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -41,7 +42,6 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
auto output = op_desc.Output("Y").front(); auto output = op_desc.Output("Y").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front();
bool is_test = boost::get<bool>(op_desc.GetAttr("is_test"));
auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon")); auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon"));
auto bn_op_name = op_name + ":bn"; auto bn_op_name = op_name + ":bn";
......
...@@ -34,8 +34,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -34,8 +34,8 @@ void ConcatOpConverter::operator()(const framework::proto::OpDesc &op,
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
int axis = boost::get<int>(op_desc.GetAttr("axis")); int axis = boost::get<int>(op_desc.GetAttr("axis"));
auto input_names = op_desc.Input("X"); auto input_names = op_desc.Input("X");
PADDLE_ENFORCE(axis > 0, // PADDLE_ENFORCE(axis > 0,
"The axis attr of Concat op should be large than 0 for trt"); // "The axis attr of Concat op should be large than 0 for trt");
auto y_name = op_desc.Output("Out").front(); auto y_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/density_prior_box.h"
#include <algorithm>
#include <map>
#include <vector>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
auto input_name = op_desc.Input("Input").front();
auto image_name = op_desc.Input("Image").front();
auto output_name = op_desc.Output("Boxes").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Boxes").front();
auto fixed_sizes =
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_sizes"));
auto fixed_ratios =
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_ratios"));
auto densities = boost::get<std::vector<int>>(op_desc.GetAttr("densities"));
// lack flip
auto clip = boost::get<bool>(op_desc.GetAttr("clip"));
auto variances = boost::get<std::vector<float>>(op_desc.GetAttr("variances"));
// lack img_h, img_w
auto step_h = boost::get<float>(op_desc.GetAttr("step_h"));
auto step_w = boost::get<float>(op_desc.GetAttr("step_w"));
auto offset = boost::get<float>(op_desc.GetAttr("offset"));
std::vector<std::string> order = {"MIN", "COM", "MAX"};
std::vector<float> temp_v = {};
engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name});
engine_->AddOpAttr<PTuple<float>>(op_name, "min_size", temp_v);
engine_->AddOpAttr<PTuple<float>>(op_name, "max_size", temp_v);
engine_->AddOpAttr<PTuple<float>>(op_name, "aspect_ratio", temp_v);
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_sizes", fixed_sizes);
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratios", fixed_ratios);
engine_->AddOpAttr<PTuple<int>>(op_name, "density", densities);
engine_->AddOpAttr(op_name, "is_flip", false);
engine_->AddOpAttr(op_name, "is_clip", clip);
engine_->AddOpAttr<PTuple<float>>(op_name, "variance", variances);
engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0));
engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0));
engine_->AddOpAttr(op_name, "step_h", step_h);
engine_->AddOpAttr(op_name, "step_w", step_w);
engine_->AddOpAttr(op_name, "offset", offset);
engine_->AddOpAttr<PTuple<std::string>>(op_name, "order", order);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class DensityPriorBoxOpConverter : public AnakinOpConverter {
public:
DensityPriorBoxOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~DensityPriorBoxOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/detection_out.h"
#include <algorithm>
#include <map>
using anakin::graph::GraphGlobalMem;
using anakin::AK_FLOAT;
using anakin::saber::NV;
using anakin::saber::Shape;
namespace paddle {
namespace inference {
namespace anakin {
void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
auto target_name = op_desc.Input("TargetBox").front();
auto prior_box_name = op_desc.Input("PriorBox").front();
auto scores_name = op_desc.Input("Scores").front();
auto output_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto code_type = boost::get<std::string>(op_desc.GetAttr("code_type"));
auto background_label = boost::get<int>(op_desc.GetAttr("background_label"));
auto score_threshold = boost::get<float>(op_desc.GetAttr("score_threshold"));
auto nms_top_k = boost::get<int>(op_desc.GetAttr("nms_top_k"));
auto nms_threshold = boost::get<float>(op_desc.GetAttr("nms_threshold"));
auto nms_eta = boost::get<float>(op_desc.GetAttr("nms_eta"));
auto keep_top_k = boost::get<int>(op_desc.GetAttr("keep_top_k"));
std::string anakin_code_type;
if (code_type == "decode_center_size") {
anakin_code_type = "CENTER_SIZE";
} else if (code_type == "encode_center_size") {
PADDLE_THROW(
"Not support encode_center_size code_type in DetectionOut of anakin");
}
engine_->AddOp(op_name, "DetectionOutput",
{target_name, scores_name, prior_box_name}, {output_name});
engine_->AddOpAttr(op_name, "share_location", true);
engine_->AddOpAttr(op_name, "variance_encode_in_target", false);
engine_->AddOpAttr(op_name, "class_num", static_cast<int>(0));
engine_->AddOpAttr(op_name, "background_id", background_label);
engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k);
engine_->AddOpAttr(op_name, "code_type", anakin_code_type);
engine_->AddOpAttr(op_name, "conf_thresh", score_threshold);
engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k);
engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold);
engine_->AddOpAttr(op_name, "nms_eta", nms_eta);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(detection_out, DetectionOutOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
class DetectionOutOpConverter : public AnakinOpConverter {
public:
DetectionOutOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope,
bool test_mode) override;
virtual ~DetectionOutOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
...@@ -34,20 +34,11 @@ void FlattenOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -34,20 +34,11 @@ void FlattenOpConverter::operator()(const framework::proto::OpDesc &op,
auto input = op_desc.Input("X").front(); auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Out").front(); auto output = op_desc.Output("Out").front();
auto in_dims = scope.FindVar(input)->Get<framework::LoDTensor>().dims();
int axis = boost::get<int>(op_desc.GetAttr("axis")); int axis = boost::get<int>(op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis == 1,
"the anakin flatten op converter now only support aixs == 1.");
int inner = 1; std::vector<int> out_dims = {0, -1, 1, 1};
int outer = 1;
for (int i = 0; i < in_dims.size(); i++) {
if (i < axis) {
outer *= in_dims[i];
} else {
inner *= in_dims[i];
}
}
std::vector<int> out_dims = {1, outer, inner, 1};
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
engine_->AddOp(op_name, "Reshape", {input}, {output}); engine_->AddOp(op_name, "Reshape", {input}, {output});
engine_->AddOpAttr<PTuple<int>>(op_name, "dims", out_dims); engine_->AddOpAttr<PTuple<int>>(op_name, "dims", out_dims);
......
...@@ -47,6 +47,10 @@ class AnakinOpConverter { ...@@ -47,6 +47,10 @@ class AnakinOpConverter {
std::string op_type = op_desc.Type(); std::string op_type = op_desc.Type();
AnakinOpConverter *it = nullptr; AnakinOpConverter *it = nullptr;
if (op_type == "reshape2") op_type = "reshape";
if (op_type == "transpose2") op_type = "transpose";
if (op_type == "flatten2") op_type = "flatten";
if (!it) { if (!it) {
it = Registry<AnakinOpConverter>::Global().Lookup(op_type); it = Registry<AnakinOpConverter>::Global().Lookup(op_type);
} }
......
...@@ -44,6 +44,29 @@ TEST(concat_op, test) { ...@@ -44,6 +44,29 @@ TEST(concat_op, test) {
validator.Execute(1); validator.Execute(1);
} }
TEST(concat_op, test2) {
std::unordered_set<std::string> parameters({""});
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("concat_x1", {1, 4});
validator.DeclInputVar("concat_x2", {3, 4});
validator.DeclInputVar("concat_x3", {2, 4});
validator.DeclOutputVar("concat_out", {6, 4});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("concat");
desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"});
desc.SetOutput("Out", {"concat_out"});
int axis = 0;
desc.SetAttr("axis", axis);
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
......
...@@ -27,13 +27,13 @@ TEST(flatten_op, test) { ...@@ -27,13 +27,13 @@ TEST(flatten_op, test) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
framework::Scope scope; framework::Scope scope;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("flatten-X", {3, 100, 100, 4}); validator.DeclInputVar("flatten-X", {3, 10, 10, 4});
validator.DeclOutputVar("flatten-Out", {1, 300, 400, 1}); validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1});
framework::OpDesc desc; framework::OpDesc desc;
desc.SetType("flatten"); desc.SetType("flatten");
desc.SetInput("X", {"flatten-X"}); desc.SetInput("X", {"flatten-X"});
desc.SetOutput("Out", {"flatten-Out"}); desc.SetOutput("Out", {"flatten-Out"});
desc.SetAttr("axis", 2); desc.SetAttr("axis", 1);
LOG(INFO) << "set OP"; LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto()); validator.SetOp(*desc.Proto());
......
...@@ -45,6 +45,27 @@ TEST(reshape, test) { ...@@ -45,6 +45,27 @@ TEST(reshape, test) {
validator.Execute(1); validator.Execute(1);
} }
TEST(reshape, test2) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("reshape-X", {1, 2, 4});
validator.DeclOutputVar("reshape-Out", {1, 4, 2});
framework::OpDesc desc;
desc.SetType("reshape");
desc.SetInput("X", {"reshape-X"});
desc.SetOutput("Out", {"reshape-Out"});
// desc.SetAttr("shape", std::vector<int>({3, 2, 1, 3}));
desc.SetAttr("shape", std::vector<int>({0, -1, 2}));
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
}
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
......
...@@ -27,9 +27,8 @@ TEST(softmax, test) { ...@@ -27,9 +27,8 @@ TEST(softmax, test) {
std::unordered_set<std::string> parameters; std::unordered_set<std::string> parameters;
AnakinConvertValidation validator(parameters, scope); AnakinConvertValidation validator(parameters, scope);
std::vector<int> tensor_shape{8, 10}; validator.DeclInputVar("softmax-X", {1, 10});
validator.DeclInputVar("softmax-X", {1, 10, 1, 1}); validator.DeclOutputVar("softmax-Out", {1, 10});
validator.DeclOutputVar("softmax-Out", {1, 10, 1, 1});
framework::OpDesc desc; framework::OpDesc desc;
desc.SetType("softmax"); desc.SetType("softmax");
......
...@@ -43,6 +43,28 @@ TEST(transpose_op, test) { ...@@ -43,6 +43,28 @@ TEST(transpose_op, test) {
validator.Execute(3); validator.Execute(3);
} }
// test input shape's dims < 4
TEST(transpose_op, test2) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, scope);
validator.DeclInputVar("transpose-X", {3, 4, 5});
validator.DeclOutputVar("transpose-Out", {3, 5, 4});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("transpose");
desc.SetInput("X", {"transpose-X"});
desc.SetOutput("Out", {"transpose-Out"});
desc.SetAttr("axis", std::vector<int>({0, 2, 1}));
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
}
} // namespace anakin } // namespace anakin
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
......
...@@ -40,6 +40,11 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op, ...@@ -40,6 +40,11 @@ void TransposeOpConverter::operator()(const framework::proto::OpDesc &op,
engine_->AddOp(op_name, "Permute", {input}, {output}); engine_->AddOp(op_name, "Permute", {input}, {output});
auto axis = boost::get<std::vector<int>>(op_desc.GetAttr("axis")); auto axis = boost::get<std::vector<int>>(op_desc.GetAttr("axis"));
size_t axis_size = axis.size();
while (axis.size() < 4) {
axis.push_back(axis_size);
axis_size += 1;
}
engine_->AddOpAttr<PTuple<int>>(op_name, "dims", axis); engine_->AddOpAttr<PTuple<int>>(op_name, "dims", axis);
} }
......
...@@ -127,6 +127,9 @@ class AnakinConvertValidation { ...@@ -127,6 +127,9 @@ class AnakinConvertValidation {
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_, auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(scope_,
input); input);
auto t_shape = framework::vectorize2int(t.dims()); auto t_shape = framework::vectorize2int(t.dims());
while (t_shape.size() < 4) {
t_shape.push_back(1);
}
engine_->SetInputShape(input, t_shape); engine_->SetInputShape(input, t_shape);
} }
engine_->Optimize(); engine_->Optimize();
......
...@@ -21,7 +21,7 @@ namespace anakin { ...@@ -21,7 +21,7 @@ namespace anakin {
// Just tell by the op_types. // Just tell by the op_types.
struct SimpleOpTypeSetTeller : public Teller { struct SimpleOpTypeSetTeller : public Teller {
SimpleOpTypeSetTeller() { SimpleOpTypeSetTeller() {
// teller_set.insert("mul"); teller_set.insert("mul");
teller_set.insert("fc"); teller_set.insert("fc");
teller_set.insert("conv2d_fusion"); teller_set.insert("conv2d_fusion");
teller_set.insert("split"); teller_set.insert("split");
...@@ -30,7 +30,14 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -30,7 +30,14 @@ struct SimpleOpTypeSetTeller : public Teller {
teller_set.insert("elementwise_add"); teller_set.insert("elementwise_add");
teller_set.insert("concat"); teller_set.insert("concat");
teller_set.insert("tanh"); teller_set.insert("tanh");
// teller_set.insert("conv2d"); teller_set.insert("conv2d");
teller_set.insert("batch_norm");
teller_set.insert("softmax");
teller_set.insert("flatten2");
teller_set.insert("reshape2");
teller_set.insert("transpose2");
teller_set.insert("density_prior_box");
teller_set.insert("detection_out");
} }
bool operator()(const std::string& op_type, bool operator()(const std::string& op_type,
......
...@@ -45,7 +45,7 @@ std::unique_ptr<framework::ir::Graph> analysis::AnakinSubgraphPass::ApplyImpl( ...@@ -45,7 +45,7 @@ std::unique_ptr<framework::ir::Graph> analysis::AnakinSubgraphPass::ApplyImpl(
return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op()); return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op());
}; };
SubGraphFuser fuser(graph.get(), teller, 0); SubGraphFuser fuser(graph.get(), teller, 3 /* min_subgraph_size */);
fuser(); fuser();
for (auto *node : graph->Nodes()) { for (auto *node : graph->Nodes()) {
......
...@@ -64,3 +64,8 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI ...@@ -64,3 +64,8 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI
anakin_target(inference_anakin_api) anakin_target(inference_anakin_api)
anakin_target(inference_anakin_api_shared) anakin_target(inference_anakin_api_shared)
endif() endif()
if (WITH_ANAKIN_SUBGRAPH)
inference_analysis_test(test_anakin_model SRCS mobilenet_test.cc EXTRA_DEPS paddle_fluid)
inference_analysis_test(anakin_conv_model SRCS conv_anakin_test.cc EXTRA_DEPS paddle_fluid)
inference_analysis_test(life_feature_test SRCS life_feature_test.cc EXTRA_DEPS paddle_fluid)
endif()
...@@ -808,13 +808,22 @@ USE_TRT_CONVERTER(conv2d_transpose); ...@@ -808,13 +808,22 @@ USE_TRT_CONVERTER(conv2d_transpose);
USE_TRT_CONVERTER(leaky_relu); USE_TRT_CONVERTER(leaky_relu);
#endif #endif
USE_ANAKIN_CONVERTER(mul);
USE_ANAKIN_CONVERTER(fc); USE_ANAKIN_CONVERTER(fc);
USE_ANAKIN_CONVERTER(conv2d); USE_ANAKIN_CONVERTER(conv2d);
USE_ANAKIN_CONVERTER(conv2d_fusion);
USE_ANAKIN_CONVERTER(concat); USE_ANAKIN_CONVERTER(concat);
USE_ANAKIN_CONVERTER(split); USE_ANAKIN_CONVERTER(split);
USE_ANAKIN_CONVERTER(relu); USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(sigmoid); USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh); USE_ANAKIN_CONVERTER(tanh);
USE_ANAKIN_CONVERTER(pool2d); USE_ANAKIN_CONVERTER(pool2d);
USE_ANAKIN_CONVERTER(conv2d_fusion);
USE_ANAKIN_CONVERTER(elementwise_add); USE_ANAKIN_CONVERTER(elementwise_add);
USE_ANAKIN_CONVERTER(batch_norm);
USE_ANAKIN_CONVERTER(flatten);
USE_ANAKIN_CONVERTER(reshape);
USE_ANAKIN_CONVERTER(transpose);
USE_ANAKIN_CONVERTER(softmax);
USE_ANAKIN_CONVERTER(detection_out);
USE_ANAKIN_CONVERTER(density_prior_box);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册