diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bb0e5f51f4bee20905016579a99715859ab37c5..a38e32b73d51f142a1a1379541ffbd922662561a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,7 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_PSLIB "Compile with pslib support" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF) +# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. option(WITH_ANAKIN "Compile with Anakin library" OFF) option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF) option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON) @@ -190,6 +191,7 @@ include(configure) # add paddle env configuration if(WITH_GPU) include(cuda) include(tensorrt) + include(anakin_subgraph) endif() if(WITH_MKL OR WITH_MKLML) include(external/anakin) diff --git a/cmake/anakin_subgraph.cmake b/cmake/anakin_subgraph.cmake new file mode 100644 index 0000000000000000000000000000000000000000..4a7d32a63553df31e0928e7b30249ff3e809cba1 --- /dev/null +++ b/cmake/anakin_subgraph.cmake @@ -0,0 +1,32 @@ +if(NOT WITH_GPU) + return() +endif() + +set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT") +find_path(ANAKIN_INCLUDE_DIR anakin_config.h + PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include + $ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include + NO_DEFAULT_PATH +) + +find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so + PATHS ${ANAKIN_ROOT} + $ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib + NO_DEFAULT_PATH + DOC "Path to ANAKIN library.") + +if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY) + if(WITH_DSO) + set(ANAKIN_FOUND ON) + endif(WITH_DSO) +else() + set(ANAKIN_FOUND OFF) +endif() + +if(ANAKIN_FOUND) + message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ") + include_directories(${ANAKIN_ROOT}/include) + include_directories(${ANAKIN_ROOT}/include/saber) + link_directories(${ANAKIN_ROOT}) + add_definitions(-DPADDLE_WITH_ANAKIN) +endif() diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index 891ff222633741f9894c2fdb6c0096a48f8a35e1..3bf12094e4c32e69f908cbe6cefc7871fc9bb568 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -33,5 +33,6 @@ if(TENSORRT_FOUND) message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") include_directories(${TENSORRT_INCLUDE_DIR}) + link_directories(${TENSORRT_LIBRARY}) add_definitions(-DPADDLE_WITH_TENSORRT) endif() diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 8c7d4cf0e3ef618b7321345064836d8043a83811..81b8ffa83f612f5b67cd91a7a2c1228519a1fbb7 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -68,16 +68,22 @@ pass_library(transpose_flatten_concat_fuse_pass inference) pass_library(identity_scale_op_clean_pass base) pass_library(sync_batch_norm_pass base) pass_library(runtime_context_cache_pass base) +pass_library(simplify_anakin_detection_pattern_pass inference) +pass_library(anakin_fillconstant_elementwisemul_fuse inference) # There may be many transpose-flatten structures in a model, and the output of # these structures will be used as inputs to the concat Op. This pattern will # be detected by our pass. The index here represents the number of structures in the # pattern. We use index 3 ~ 6, because these quantities of structures are # common in the models. -foreach (index RANGE 3 6) +foreach (index RANGE 2 6) file(APPEND ${pass_file} "USE_PASS(transpose_flatten${index}_concat_fuse_pass);\n") endforeach() +foreach (index RANGE 2 6) + file(APPEND ${pass_file} "USE_PASS(simplify_anakin_detection_pattern_pass${index});\n") +endforeach() + if(WITH_MKLDNN) pass_library(mkldnn_placement_pass base mkldnn) pass_library(depthwise_conv_mkldnn_pass base mkldnn) diff --git a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc b/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc new file mode 100644 index 0000000000000000000000000000000000000000..83b0da0c0118a856e54d744607cee8b421f330a3 --- /dev/null +++ b/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h" +#include "paddle/fluid/framework/ir/graph_viz_pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern); +#define GET_NODES \ + GET_IR_NODE(fill_constant); \ + GET_IR_NODE(fill_constant_out); \ + GET_IR_NODE(elementwise_mul); \ + GET_IR_NODE(elementwise_mul_out); + +std::unique_ptr AnakinFillconstantElementwisemulFuse::ApplyImpl( + std::unique_ptr graph) const { + const std::string pattern_name = "anakin_fillconstant_elementwisemul_fuse"; + FusePassBase::Init(pattern_name, graph.get()); + + GraphPatternDetector gpd; + auto* x = gpd.mutable_pattern() + ->NewNode("x") + ->assert_is_op_input("elementwise_mul", "X") + ->AsInput(); + + patterns::AnakinFillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(), + pattern_name); + pattern(x); + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_NODES; + + PADDLE_ENFORCE(subgraph.count(x)); + auto* elementwise_in = subgraph.at(x); + float constant_value = + boost::get(fill_constant->Op()->GetAttr("value")); + + framework::OpDesc new_op_desc; + new_op_desc.SetType("scale"); + new_op_desc.SetInput("X", {elementwise_in->Name()}); + new_op_desc.SetAttr("scale", constant_value); + new_op_desc.SetAttr("bias", static_cast(0.0)); + new_op_desc.SetAttr("bias_after_scale", true); + new_op_desc.SetOutput("Out", {elementwise_mul_out->Name()}); + new_op_desc.Flush(); + + // Create a new node for the fused op. + auto* scale_op = graph->CreateOpNode(&new_op_desc); + + IR_NODE_LINK_TO(elementwise_in, scale_op); // Input + IR_NODE_LINK_TO(scale_op, elementwise_mul_out); // Output + + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph.get(), + {fill_constant, fill_constant_out, elementwise_mul}); + }; + + gpd(graph.get(), handler); + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(anakin_fillconstant_elementwisemul_fuse, + paddle::framework::ir::AnakinFillconstantElementwisemulFuse); diff --git a/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h b/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h new file mode 100644 index 0000000000000000000000000000000000000000..fa95143d3adae3e3eeb913af09986fb4a401bd73 --- /dev/null +++ b/paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +class AnakinFillconstantElementwisemulFuse : public FusePassBase { + public: + virtual ~AnakinFillconstantElementwisemulFuse() {} + + protected: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index d0d72127f08f4a83cca5daed57ae6d72c33ae1e3..555fdc7b7a03ebc99fcc77a26341d291dac2c308 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1470,6 +1470,171 @@ PDNode *patterns::TransposeFlattenConcat::operator()( return concat_out; } +PDNode *patterns::AnakinDetectionPattern::operator()( + std::vector conv_in, int times) { + // The times represents the repeat times of the + // {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape} + const int kNumFields = 7; + const int kPriorBoxLocOffset = 1; + const int kReshape1Offset = 2; + const int kReshape1OutOffset = 3; + const int kPriorBoxVarOffset = 4; + const int kReshape2Offset = 5; + const int kReshape2OutOffset = 6; + + const int kBoxCoderThirdInputOffset = times; + const int kMultiClassSecondInputNmsOffset = times + 1; + + std::vector nodes; + + for (int i = 0; i < times; i++) { + nodes.push_back( + pattern->NewNode(GetNodeName("prior_box" + std::to_string(i))) + ->assert_is_op("density_prior_box")); + nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i))) + ->assert_is_op_output("density_prior_box", "Boxes") + ->assert_is_op_input("reshape2", "X") + ->AsIntermediate()); + nodes.push_back( + pattern->NewNode(GetNodeName("reshape1" + std::to_string(i))) + ->assert_is_op("reshape2")); + + nodes.push_back( + pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i))) + ->assert_is_op_output("reshape2") + ->assert_is_op_nth_input("concat", "X", i) + ->AsIntermediate()); + + nodes.push_back( + pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i))) + ->assert_is_op_output("density_prior_box", "Variances") + ->assert_is_op_input("reshape2", "X") + ->AsIntermediate()); + nodes.push_back( + pattern->NewNode(GetNodeName("reshape2" + std::to_string(i))) + ->assert_is_op("reshape2")); + + nodes.push_back( + pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i))) + ->assert_is_op_output("reshape2") + ->assert_is_op_nth_input("concat", "X", i) + ->AsIntermediate()); + } + + auto concat_op1 = pattern->NewNode(GetNodeName("concat1")) + ->assert_is_op("concat") + ->assert_op_has_n_inputs("concat", times); + auto concat_out1 = pattern->NewNode(GetNodeName("concat1_out")) + ->assert_is_op_output("concat") + ->AsIntermediate(); + + auto concat_op2 = pattern->NewNode(GetNodeName("concat2")) + ->assert_is_op("concat") + ->assert_op_has_n_inputs("concat", times); + auto concat_out2 = pattern->NewNode(GetNodeName("concat2_out")) + ->assert_is_op_output("concat") + ->AsIntermediate(); + + auto box_coder_op = pattern->NewNode(GetNodeName("box_coder")) + ->assert_is_op("box_coder") + ->assert_op_has_n_inputs("box_coder", 3); + + auto box_coder_out = pattern->NewNode(GetNodeName("box_coder_out")) + ->assert_is_op_output("box_coder") + ->AsIntermediate(); + + auto transpose_before_nms = + pattern->NewNode(GetNodeName("transpose_before_nms")) + ->assert_is_op("transpose2"); + + auto transpose_before_nms_out = + pattern->NewNode(GetNodeName("transpose_before_nms_out")) + ->assert_is_op_output("transpose2") + ->assert_is_op_input("multiclass_nms", "Scores") + ->AsIntermediate(); + + auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms")) + ->assert_is_op("multiclass_nms") + ->assert_op_has_n_inputs("multiclass_nms", 2); + + auto multiclass_nms_out = pattern->NewNode(GetNodeName("multiclass_nms_out")) + ->assert_is_op_output("multiclass_nms") + ->AsOutput(); + + std::vector reshape1_outs; + std::vector reshape2_outs; + + for (int i = 0; i < times; i++) { + conv_in[i]->AsInput(); + // prior_box + nodes[i * kNumFields]->LinksFrom({conv_in[i]}); + // prior_box box out + nodes[i * kNumFields + kPriorBoxLocOffset]->LinksFrom( + {nodes[i * kNumFields]}); + // reshape + nodes[i * kNumFields + kReshape1Offset]->LinksFrom( + {nodes[i * kNumFields + kPriorBoxLocOffset]}); + // reshape_out + nodes[i * kNumFields + kReshape1OutOffset]->LinksFrom( + {nodes[i * kNumFields + kReshape1Offset]}); + + nodes[i * kNumFields + kPriorBoxVarOffset]->LinksFrom( + {nodes[i * kNumFields]}); + // reshape + nodes[i * kNumFields + kReshape2Offset]->LinksFrom( + {nodes[i * kNumFields + kPriorBoxVarOffset]}); + // reshape_out + nodes[i * kNumFields + kReshape2OutOffset]->LinksFrom( + {nodes[i * kNumFields + kReshape2Offset]}); + + reshape1_outs.push_back(nodes[i * kNumFields + kReshape1OutOffset]); + reshape2_outs.push_back(nodes[i * kNumFields + kReshape2OutOffset]); + } + + concat_op1->LinksFrom(reshape1_outs); + concat_op2->LinksFrom(reshape2_outs); + concat_out1->LinksFrom({concat_op1}); + concat_out2->LinksFrom({concat_op2}); + + conv_in[kBoxCoderThirdInputOffset]->AsInput(); + conv_in[kMultiClassSecondInputNmsOffset]->AsInput(); + + box_coder_op->LinksFrom( + {concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]}); + box_coder_out->LinksFrom({box_coder_op}); + + transpose_before_nms->LinksFrom({conv_in[kMultiClassSecondInputNmsOffset]}); + transpose_before_nms_out->LinksFrom({transpose_before_nms}); + + multiclass_nms_op->LinksFrom({box_coder_out, transpose_before_nms_out}) + .LinksTo({multiclass_nms_out}); + + return multiclass_nms_out; +} + +PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()( + PDNode *elementwise_op_input) { + auto fill_constant = + pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant"); + + auto fill_constant_out = pattern->NewNode(fill_constant_out_repr()) + ->assert_is_op_output("fill_constant") + ->assert_is_op_input("elementwise_mul", "Y") + ->AsIntermediate(); + + auto elementwise_mul_op = + pattern->NewNode(elementwise_mul_repr())->assert_is_op("elementwise_mul"); + + auto elementwise_mul_out = pattern->NewNode(elementwise_mul_out_repr()) + ->assert_is_op_output("elementwise_mul") + ->AsOutput(); + + fill_constant_out->LinksFrom({fill_constant}); + elementwise_mul_op->LinksFrom({elementwise_op_input, fill_constant_out}); + elementwise_mul_out->LinksFrom({elementwise_mul_op}); + return elementwise_mul_out; +} + } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index bac23b651305419a5bcc4fc1efacb721e6e5d0ad..130ddeac4cd1a38516540d175e17d46f877bd909 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -844,6 +844,36 @@ struct TransposeFlattenConcat : public PatternBase { } }; +struct AnakinDetectionPattern : public PatternBase { + AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "anakin_detect_pattern") {} + + PDNode* operator()(std::vector conv_inputs, int times); + + std::string GetNodeName(const std::string& op_type) { + return PDNodeName(name_scope_, repr_, id_, op_type); + } + + PDNode* GetPDNode(const std::string& op_type) { + return pattern->RetrieveNode(GetNodeName(op_type)); + } +}; + +struct AnakinFillConstantElementWiseMulFuse : public PatternBase { + AnakinFillConstantElementWiseMulFuse(PDPattern* pattern, + const std::string& name_scope) + : PatternBase(pattern, name_scope, + "anakin_fillconstant_elementwisemul_fuse") {} + + PDNode* operator()(PDNode* elementwise_op_input); + + // declare operator node's name + PATTERN_DECL_NODE(fill_constant); + PATTERN_DECL_NODE(fill_constant_out); + PATTERN_DECL_NODE(elementwise_mul); + PATTERN_DECL_NODE(elementwise_mul_out); +}; + } // namespace patterns // Link two ir::Nodes from each other. diff --git a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc b/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..84fb8063e6f020d5ada2c6af7a0307360aa1c92c --- /dev/null +++ b/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc @@ -0,0 +1,244 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "paddle/fluid/framework/ir/graph_viz_pass.h" +#include "paddle/fluid/framework/ir/node.h" +#include "paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +template +std::unique_ptr SimplifyAnakinDetectionPatternPass::ApplyImpl( + std::unique_ptr graph) const { + const std::string pattern_name = + "simplify_anakin_detection_pattern_pass" + std::to_string(times); + FusePassBase::Init(pattern_name, graph.get()); + + GraphPatternDetector gpd; + std::vector input_nodes; + for (int i = 0; i < times; i++) { + input_nodes.push_back(gpd.mutable_pattern() + ->NewNode("x" + std::to_string(i)) + ->assert_is_op_input("density_prior_box", "Input") + ->AsInput()); + } + input_nodes.push_back(gpd.mutable_pattern() + ->NewNode("x" + std::to_string(times)) + ->assert_is_op_input("box_coder", "TargetBox") + ->AsInput()); + + input_nodes.push_back(gpd.mutable_pattern() + ->NewNode("x" + std::to_string(times + 1)) + ->assert_is_op_input("transpose2") + ->AsInput()); + + patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name); + pattern(input_nodes, times); + + auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, + Graph *g) { + const int kNumFields = 7; + const int kPriorBoxLocOffset = 1; + const int kReshape1Offset = 2; + const int kReshape1OutOffset = 3; + const int kPriorBoxVarOffset = 4; + const int kReshape2Offset = 5; + const int kReshape2OutOffset = 6; + std::vector nodes; + + for (int i = 0; i < times; i++) { + PADDLE_ENFORCE( + subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i)))); + PADDLE_ENFORCE( + subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i)))); + PADDLE_ENFORCE( + subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i)))); + PADDLE_ENFORCE( + subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i)))); + PADDLE_ENFORCE( + subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i)))); + PADDLE_ENFORCE( + subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i)))); + + PADDLE_ENFORCE( + subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i)))); + + nodes.push_back( + subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i)))); + nodes.push_back( + subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i)))); + } + + Node *concat_op1 = subgraph.at(pattern.GetPDNode("concat1")); + Node *concat_out1 = subgraph.at(pattern.GetPDNode("concat1_out")); + + Node *concat_op2 = subgraph.at(pattern.GetPDNode("concat2")); + Node *concat_out2 = subgraph.at(pattern.GetPDNode("concat2_out")); + + Node *box_coder_third_input = subgraph.at(input_nodes[times]); + Node *box_coder_op = subgraph.at(pattern.GetPDNode("box_coder")); + Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out")); + + Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]); + Node *transpose_before_nms = + subgraph.at(pattern.GetPDNode("transpose_before_nms")); + Node *transpose_before_nms_out = + subgraph.at(pattern.GetPDNode("transpose_before_nms_out")); + + Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms")); + Node *multiclass_nms_out = + subgraph.at(pattern.GetPDNode("multiclass_nms_out")); + + std::string code_type = + boost::get(box_coder_op->Op()->GetAttr("code_type")); + bool box_normalized = + boost::get(box_coder_op->Op()->GetAttr("box_normalized")); + // auto variance = + // boost::get>(box_coder_op->Op()->GetAttr("variance")); + int background_label = + boost::get(multiclass_nms->Op()->GetAttr("background_label")); + float score_threshold = + boost::get(multiclass_nms->Op()->GetAttr("score_threshold")); + int nms_top_k = boost::get(multiclass_nms->Op()->GetAttr("nms_top_k")); + float nms_threshold = + boost::get(multiclass_nms->Op()->GetAttr("nms_threshold")); + float nms_eta = boost::get(multiclass_nms->Op()->GetAttr("nms_eta")); + int keep_top_k = + boost::get(multiclass_nms->Op()->GetAttr("keep_top_k")); + + std::vector concat1_input_names; + for (int i = 0; i < times; i++) { + concat1_input_names.push_back( + nodes[i * kNumFields + kPriorBoxLocOffset]->Name()); + } + + // int axis = boost::get(concat_op1->Op()->GetAttr("axis")); + framework::OpDesc concat1_desc; + concat1_desc.SetType("concat"); + concat1_desc.SetInput("X", concat1_input_names); + concat1_desc.SetAttr("axis", 2); + concat1_desc.SetOutput("Out", {concat_out1->Name()}); + + auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc); + + for (int i = 0; i < times; i++) { + nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back( + new_add_concat_op); + new_add_concat_op->inputs.push_back( + nodes[i * kNumFields + kPriorBoxLocOffset]); + } + + framework::OpDesc new_op_desc; + new_op_desc.SetType("detection_out"); + new_op_desc.SetInput("PriorBox", {concat_out1->Name()}); + new_op_desc.SetInput("TargetBox", {box_coder_third_input->Name()}); + new_op_desc.SetInput("Scores", {multiclass_nms_second_input->Name()}); + new_op_desc.SetAttr("code_type", code_type); + new_op_desc.SetAttr("box_normalized", box_normalized); + new_op_desc.SetAttr("background_label", background_label); + new_op_desc.SetAttr("score_threshold", score_threshold); + new_op_desc.SetAttr("nms_top_k", nms_top_k); + new_op_desc.SetAttr("nms_threshold", nms_threshold); + new_op_desc.SetAttr("nms_eta", nms_eta); + new_op_desc.SetAttr("keep_top_k", keep_top_k); + new_op_desc.SetOutput("Out", {multiclass_nms_out->Name()}); + new_op_desc.Flush(); + + // Create a new node for the fused op. + auto *detection_out_op = graph->CreateOpNode(&new_op_desc); + + std::unordered_set delete_nodes; + + for (int i = 0; i < times; i++) { + nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(concat_op1); + delete_nodes.insert(nodes[i * kNumFields + kReshape1Offset]); + delete_nodes.insert(nodes[i * kNumFields + kReshape1OutOffset]); + delete_nodes.insert(nodes[i * kNumFields + kPriorBoxVarOffset]); + delete_nodes.insert(nodes[i * kNumFields + kReshape2Offset]); + delete_nodes.insert(nodes[i * kNumFields + kReshape2OutOffset]); + } + + delete_nodes.insert(concat_op1); + delete_nodes.insert(concat_op2); + delete_nodes.insert(concat_out2); + delete_nodes.insert(box_coder_op); + delete_nodes.insert(box_coder_out); + delete_nodes.insert(transpose_before_nms); + delete_nodes.insert(transpose_before_nms_out); + delete_nodes.insert(multiclass_nms); + + new_add_concat_op->outputs.push_back(concat_out1); + concat_out1->inputs.push_back(new_add_concat_op); + + detection_out_op->inputs.push_back(concat_out1); + detection_out_op->inputs.push_back(box_coder_third_input); + detection_out_op->inputs.push_back(multiclass_nms_second_input); + detection_out_op->outputs.push_back(multiclass_nms_out); + + concat_out1->outputs.push_back(detection_out_op); + box_coder_third_input->outputs.push_back(detection_out_op); + multiclass_nms_second_input->outputs.push_back(detection_out_op); + multiclass_nms_out->inputs.push_back(detection_out_op); + + // Delete the unneeded nodes. + GraphSafeRemoveNodes(graph.get(), delete_nodes); + }; + + gpd(graph.get(), handler); + return graph; +} + +template class SimplifyAnakinDetectionPatternPass<1>; +template class SimplifyAnakinDetectionPatternPass<2>; +template class SimplifyAnakinDetectionPatternPass<3>; +template class SimplifyAnakinDetectionPatternPass<4>; +template class SimplifyAnakinDetectionPatternPass<5>; +template class SimplifyAnakinDetectionPatternPass<6>; + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(simplify_anakin_detection_pattern_pass, + paddle::framework::ir::SimplifyAnakinDetectionPatternPass<1>); + +REGISTER_PASS(simplify_anakin_detection_pattern_pass2, + paddle::framework::ir::SimplifyAnakinDetectionPatternPass<2>); + +REGISTER_PASS(simplify_anakin_detection_pattern_pass3, + paddle::framework::ir::SimplifyAnakinDetectionPatternPass<3>); + +REGISTER_PASS(simplify_anakin_detection_pattern_pass4, + paddle::framework::ir::SimplifyAnakinDetectionPatternPass<4>); + +REGISTER_PASS(simplify_anakin_detection_pattern_pass5, + paddle::framework::ir::SimplifyAnakinDetectionPatternPass<5>); + +REGISTER_PASS(simplify_anakin_detection_pattern_pass6, + paddle::framework::ir::SimplifyAnakinDetectionPatternPass<6>); diff --git a/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h b/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..2338e4c38b253e2110addcca494e1cae5b58beaf --- /dev/null +++ b/paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.h @@ -0,0 +1,41 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +// There may be many transpose-flatten structures in a model, and the output of +// these structures will be used as inputs to the concat Op. This pattern will +// be detected by our pass. The times here represents the repeat times of this +// structure. +template +class SimplifyAnakinDetectionPatternPass : public FusePassBase { + public: + virtual ~SimplifyAnakinDetectionPatternPass() {} + + protected: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc index fda43948d567689103815e3ad7ba285719dae80f..cab69c408defadad32eba83e47d18f0f82ccc771 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include +#include #include #include "paddle/fluid/framework/ir/graph_viz_pass.h" @@ -123,6 +125,7 @@ std::unique_ptr TransposeFlattenConcatFusePass::ApplyImpl( } template class TransposeFlattenConcatFusePass<1>; +template class TransposeFlattenConcatFusePass<2>; template class TransposeFlattenConcatFusePass<3>; template class TransposeFlattenConcatFusePass<4>; template class TransposeFlattenConcatFusePass<5>; @@ -135,6 +138,9 @@ template class TransposeFlattenConcatFusePass<6>; REGISTER_PASS(transpose_flatten_concat_fuse_pass, paddle::framework::ir::TransposeFlattenConcatFusePass<1>); +REGISTER_PASS(transpose_flatten2_concat_fuse_pass, + paddle::framework::ir::TransposeFlattenConcatFusePass<2>); + REGISTER_PASS(transpose_flatten3_concat_fuse_pass, paddle::framework::ir::TransposeFlattenConcatFusePass<3>); diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index d27ef8fe3c33f0b293671a4fdac9e574cb92c806..4cd29486a8e846dc04fbc4e467f2c40782408dfa 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -16,7 +16,10 @@ add_subdirectory(utils) if (TENSORRT_FOUND) add_subdirectory(tensorrt) endif() -# add_subdirectory(anakin) + +if (ANAKIN_FOUND) + add_subdirectory(anakin) +endif() get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES) diff --git a/paddle/fluid/inference/anakin/CMakeLists.txt b/paddle/fluid/inference/anakin/CMakeLists.txt index b418af62f8cae4513bcca24f057d1fe100bbea25..e8fb56590563f49f920bfe71d160ec822cb3ca30 100644 --- a/paddle/fluid/inference/anakin/CMakeLists.txt +++ b/paddle/fluid/inference/anakin/CMakeLists.txt @@ -1,4 +1,5 @@ -cc_library(anakin_engine SRCS engine.cc) +cc_library(anakin_engine SRCS engine.cc DEPS framework_proto) +cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto) target_link_libraries(anakin_engine anakin anakin_saber_common) cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine) add_subdirectory(convert) diff --git a/paddle/fluid/inference/anakin/convert/CMakeLists.txt b/paddle/fluid/inference/anakin/convert/CMakeLists.txt index f5bfee861f14877b5a67bc48aeb14b8213a27370..1e7f5ac799de0d7a1debec0529d262f021bba790 100644 --- a/paddle/fluid/inference/anakin/convert/CMakeLists.txt +++ b/paddle/fluid/inference/anakin/convert/CMakeLists.txt @@ -1,2 +1,19 @@ -cc_library(anakin_op_converter SRCS fc.cc registrar.cc DEPS anakin_engine framework_proto scope) -cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op) +cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc + elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc DEPS anakin_engine framework_proto scope op_registry) + +cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op SERIAL) +cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv SERIAL) +cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter SERIAL) +cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling SERIAL) +cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split SERIAL) +cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split SERIAL) +cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op SERIAL) +cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter SERIAL SERIAL) +cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax SERIAL) +cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op SERIAL) +cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op SERIAL) +cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op SERIAL) +cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op SERIAL) +cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op SERIAL) +#cc_test(test_anakin_im2sequence SRCS test_im2sequence_op.cc DEPS anakin_op_converter im2sequence_op im2col) +cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor SERIAL) diff --git a/paddle/fluid/inference/anakin/convert/activation.cc b/paddle/fluid/inference/anakin/convert/activation.cc new file mode 100644 index 0000000000000000000000000000000000000000..c85b958d7b85cb3e21df8714c89eee10b9b3fecc --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/activation.cc @@ -0,0 +1,55 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/activation.h" +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; + +namespace paddle { +namespace inference { +namespace anakin { + +ActivationOpConverter::ActivationOpConverter(const std::string &op_type) + : op_type_(op_type) { + auto it = anakin_op_types_.find(op_type_); + PADDLE_ENFORCE(it != anakin_op_types_.end(), + "activation op type is not support"); + anakin_op_type_ = it->second; +} + +void ActivationOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + auto input_name = op_desc.Input("X").front(); + auto output_name = op_desc.Output("Out").front(); + engine_->AddOp(op_name, "Activation", {input_name}, {output_name}); + engine_->AddOpAttr(op_name, "type", anakin_op_type_); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/activation.h b/paddle/fluid/inference/anakin/convert/activation.h new file mode 100644 index 0000000000000000000000000000000000000000..49a4518bef418491a7fbc0bcde403bf047f774bd --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/activation.h @@ -0,0 +1,52 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ActivationOpConverter : public AnakinOpConverter { + public: + explicit ActivationOpConverter(const std::string &op_type); + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ActivationOpConverter() {} + + private: + std::string op_type_; + std::string anakin_op_type_; + std::map anakin_op_types_{{"tanh", "TanH"}, + {"sigmoid", "Sigmoid"}}; +}; + +class TanhOpConverter : public ActivationOpConverter { + public: + TanhOpConverter() : ActivationOpConverter("tanh") {} +}; + +class SigmoidOpConverter : public ActivationOpConverter { + public: + SigmoidOpConverter() : ActivationOpConverter("sigmoid") {} +}; +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.cc b/paddle/fluid/inference/anakin/convert/batch_norm.cc new file mode 100644 index 0000000000000000000000000000000000000000..94014802bdbe1792e9eaba28d7134624dd3edc90 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/batch_norm.cc @@ -0,0 +1,127 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/batch_norm.h" +#include +#include +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; + +namespace paddle { +namespace inference { +namespace anakin { + +void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1); + std::map inputs; + for (auto k : {"X", "Scale", "Bias", "Mean", "Variance"}) { + PADDLE_ENFORCE_EQ(op_desc.Input(k).size(), 1UL); + auto v = op_desc.Input(k).front(); + inputs.insert({k, v}); + } + + auto output = op_desc.Output("Y").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front(); + auto epsilon = boost::get(op_desc.GetAttr("epsilon")); + // auto momentum = boost::get(op_desc.GetAttr("momentum")); + + auto bn_op_name = op_name + ":bn"; + auto bn_output = bn_op_name + "_output"; + engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output}); + engine_->AddOpAttr(bn_op_name, "epsilon", epsilon); + engine_->AddOpAttr(bn_op_name, "momentum", static_cast(1.0)); + + auto scale_op_name = op_name + ":scale"; + auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name, + framework::LoDTensor *tensor) { + auto *v = scope.FindVar(var_name); + PADDLE_ENFORCE_NOT_NULL(v); + auto *t = v->GetMutable(); + tensor->Resize(t->dims()); + TensorCopySync(*t, platform::CPUPlace(), tensor); + }; + + framework::LoDTensor bias_t; + framework::LoDTensor mean_t; + framework::LoDTensor scale_t; + framework::LoDTensor variance_t; + get_lod_tensor(inputs["Bias"], &bias_t); + get_lod_tensor(inputs["Mean"], &mean_t); + get_lod_tensor(inputs["Scale"], &scale_t); + get_lod_tensor(inputs["Variance"], &variance_t); + + auto fill_shape = [](size_t n, std::vector shape) { + shape.insert(shape.begin(), 1); + if (shape.size() < n) { + shape.insert(shape.end(), n - shape.size(), 1); + } + return shape; + }; + Shape shape1(fill_shape(4, framework::vectorize2int(mean_t.dims()))); + Shape shape2(fill_shape(4, framework::vectorize2int(variance_t.dims()))); + auto *weight1 = + GraphGlobalMem::Global().template new_block(shape1); + auto *mean_data = static_cast(weight1->h_tensor().mutable_data()); + std::copy_n(mean_t.data(), mean_t.numel(), mean_data); + engine_->AddOpAttr(bn_op_name, "weight_1", *weight1); + + auto *weight2 = + GraphGlobalMem::Global().template new_block(shape2); + auto *variance_data = + static_cast(weight2->h_tensor().mutable_data()); + std::copy_n(variance_t.data(), variance_t.numel(), variance_data); + engine_->AddOpAttr(bn_op_name, "weight_2", *weight2); + + Shape shape3(std::vector({1, 1, 1, 1})); + auto *weight3 = + GraphGlobalMem::Global().template new_block(shape3); + auto *alpha_data = static_cast(weight3->h_tensor().mutable_data()); + float weight3_data[] = {1}; + std::copy(std::begin(weight3_data), std::end(weight3_data), alpha_data); + engine_->AddOpAttr(bn_op_name, "weight_3", *weight3); + + Shape scale_shape(fill_shape(4, framework::vectorize2int(scale_t.dims()))); + auto *scale = + GraphGlobalMem::Global().template new_block(scale_shape); + auto *scale_data = static_cast(scale->h_tensor().mutable_data()); + std::copy_n(scale_t.data(), scale_t.numel(), scale_data); + + Shape bias_shape(fill_shape(4, framework::vectorize2int(bias_t.dims()))); + auto *bias = + GraphGlobalMem::Global().template new_block(bias_shape); + auto *bias_data = static_cast(bias->h_tensor().mutable_data()); + std::copy_n(bias_t.data(), bias_t.numel(), bias_data); + + engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output}); + engine_->AddOpAttr(scale_op_name, "axis", 1); + engine_->AddOpAttr(scale_op_name, "num_axes", 1); + engine_->AddOpAttr(scale_op_name, "bias_term", true); + engine_->AddOpAttr(scale_op_name, "weight_1", *scale); + engine_->AddOpAttr(scale_op_name, "weight_2", *bias); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/batch_norm.h b/paddle/fluid/inference/anakin/convert/batch_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..cee5c43ae76bf28284118380ca4c861d5cbedd1c --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/batch_norm.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class BatchNormOpConverter : public AnakinOpConverter { + public: + BatchNormOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~BatchNormOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/concat.cc b/paddle/fluid/inference/anakin/convert/concat.cc new file mode 100644 index 0000000000000000000000000000000000000000..e2d1111acbb60690167530a25aeaf59858b71987 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/concat.cc @@ -0,0 +1,51 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/concat.h" +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void ConcatOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + int axis = boost::get(op_desc.GetAttr("axis")); + auto input_names = op_desc.Input("X"); + // PADDLE_ENFORCE(axis > 0, + // "The axis attr of Concat op should be large than 0 for trt"); + + auto y_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Concat", input_names, {y_name}); + engine_->AddOpAttr(op_name, "axis", axis); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/concat.h b/paddle/fluid/inference/anakin/convert/concat.h new file mode 100644 index 0000000000000000000000000000000000000000..4ff2b6d85b758efc7529c5034a34e094ee06cccb --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/concat.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ConcatOpConverter : public AnakinOpConverter { + public: + ConcatOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ConcatOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/conv2d.cc b/paddle/fluid/inference/anakin/convert/conv2d.cc new file mode 100644 index 0000000000000000000000000000000000000000..b99c6e71c4dfd2b567d85904f57ebecf0ed9a1cc --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/conv2d.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/conv2d.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void Conv2dOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL); + + auto input_name = op_desc.Input("Input").front(); + auto output_name = op_desc.Output("Output").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front(); + engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); + + auto *filter_v = scope.FindVar(op_desc.Input("Filter").front()); + PADDLE_ENFORCE_NOT_NULL(filter_v); + auto *filter_t = filter_v->GetMutable(); + std::unique_ptr weight_tensor( + new framework::LoDTensor()); + weight_tensor->Resize(filter_t->dims()); + TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get()); + + PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL); + + // const int n_output = weight_tensor->dims()[0]; + // const int n_input = weight_tensor->dims()[1]; + const int filter_h = weight_tensor->dims()[2]; + const int filter_w = weight_tensor->dims()[3]; + // auto filter_num = n_input * filter_h * filter_w ; + auto filter_num = weight_tensor->dims()[0]; + engine_->AddOpAttr(op_name, "filter_num", filter_num); + engine_->AddOpAttr>(op_name, "kernel_size", {filter_h, filter_w}); + auto strides = boost::get>(op_desc.GetAttr("strides")); + engine_->AddOpAttr>(op_name, "strides", strides); + auto paddings = boost::get>(op_desc.GetAttr("paddings")); + engine_->AddOpAttr>(op_name, "padding", paddings); + auto dilations = boost::get>(op_desc.GetAttr("dilations")); + engine_->AddOpAttr>(op_name, "dilation_rate", dilations); + const int groups = boost::get(op_desc.GetAttr("groups")); + engine_->AddOpAttr(op_name, "group", groups); + engine_->AddOpAttr(op_name, "axis", 1); + engine_->AddOpAttr(op_name, "bias_term", false); + + auto weight_shape = framework::vectorize2int(filter_t->dims()); + Shape anakin_shape(weight_shape); + auto *weight1 = + GraphGlobalMem::Global().template new_block(anakin_shape); + float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); + std::copy_n(weight_tensor->data(), weight_tensor->numel(), cpu_data); + weight1->d_tensor().set_shape(anakin_shape); + weight1->d_tensor().copy_from(weight1->h_tensor()); + engine_->AddOpAttr(op_name, "weight_1", *weight1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/registrar.cc b/paddle/fluid/inference/anakin/convert/conv2d.h similarity index 66% rename from paddle/fluid/inference/anakin/convert/registrar.cc rename to paddle/fluid/inference/anakin/convert/conv2d.h index 701ebdb2d43cf524330f946ac56d32dfa884f42a..75a30c10d481762fe5579ccb4d79feeba73dc98a 100644 --- a/paddle/fluid/inference/anakin/convert/registrar.cc +++ b/paddle/fluid/inference/anakin/convert/conv2d.h @@ -12,22 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/anakin/convert/registrar.h" +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" namespace paddle { namespace inference { namespace anakin { -std::shared_ptr OpRegister::Get(const std::string &name) { - auto it = registry_.find(name); - if (it == registry_.end()) return nullptr; - return it->second(); -} +class Conv2dOpConverter : public AnakinOpConverter { + public: + Conv2dOpConverter() = default; -OpRegister *OpRegister::instance() { - static OpRegister factory; - return &factory; -} + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~Conv2dOpConverter() {} +}; } // namespace anakin } // namespace inference diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc new file mode 100644 index 0000000000000000000000000000000000000000..4d105430dd298076fa8aa4c1925329c3a0e356a1 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void Conv2dFusionOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL); + + auto input_name = op_desc.Input("Input").front(); + auto output_name = op_desc.Output("Output").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front(); + engine_->AddOp(op_name, "Convolution", {input_name}, {output_name}); + + auto *filter_v = scope.FindVar(op_desc.Input("Filter").front()); + PADDLE_ENFORCE_NOT_NULL(filter_v); + auto *filter_t = filter_v->GetMutable(); + + auto *b_v = scope.FindVar(op_desc.Input("Bias").front()); + PADDLE_ENFORCE_NOT_NULL(b_v); + auto *b_t = b_v->GetMutable(); + + std::unique_ptr weight_tensor( + new framework::LoDTensor()); + weight_tensor->Resize(filter_t->dims()); + TensorCopySync((*filter_t), platform::CPUPlace(), weight_tensor.get()); + + PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL); + + // const int n_output = weight_tensor->dims()[0]; + // const int n_input = weight_tensor->dims()[1]; + const int filter_h = weight_tensor->dims()[2]; + const int filter_w = weight_tensor->dims()[3]; + // auto filter_num = n_input * filter_h * filter_w ; + auto filter_num = weight_tensor->dims()[0]; + engine_->AddOpAttr(op_name, "filter_num", filter_num); + engine_->AddOpAttr>(op_name, "kernel_size", {filter_h, filter_w}); + auto strides = boost::get>(op_desc.GetAttr("strides")); + engine_->AddOpAttr>(op_name, "strides", strides); + auto paddings = boost::get>(op_desc.GetAttr("paddings")); + engine_->AddOpAttr>(op_name, "padding", paddings); + auto dilations = boost::get>(op_desc.GetAttr("dilations")); + engine_->AddOpAttr>(op_name, "dilation_rate", dilations); + const int groups = boost::get(op_desc.GetAttr("groups")); + engine_->AddOpAttr(op_name, "group", groups); + engine_->AddOpAttr(op_name, "axis", 1); + engine_->AddOpAttr(op_name, "bias_term", true); + + auto weight_shape = framework::vectorize2int(filter_t->dims()); + Shape anakin_shape(weight_shape); + auto *weight1 = + GraphGlobalMem::Global().template new_block(anakin_shape); + float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); + std::copy_n(weight_tensor->data(), weight_tensor->numel(), cpu_data); + weight1->d_tensor().set_shape(anakin_shape); + weight1->d_tensor().copy_from(weight1->h_tensor()); + engine_->AddOpAttr(op_name, "weight_1", *weight1); + + auto bias_shape = framework::vectorize2int(b_t->dims()); + framework::LoDTensor bias_tensor; + bias_tensor.Resize(b_t->dims()); + TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor); + auto *bias_data = bias_tensor.data(); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + // bias_shape.push_back(1); + // bias_shape.push_back(1); + Shape anakin_bias_shape(bias_shape); + + auto *weight2 = GraphGlobalMem::Global().template new_block( + anakin_bias_shape); + float *cpu_data2 = static_cast(weight2->h_tensor().mutable_data()); + std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); + weight2->d_tensor().set_shape(anakin_bias_shape); + weight2->d_tensor().copy_from(weight2->h_tensor()); + engine_->AddOpAttr(op_name, "weight_2", *weight2); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/conv2d_fusion.h b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h new file mode 100644 index 0000000000000000000000000000000000000000..07359b9cba05bf7c885eb38d64816bdb718a6aba --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/conv2d_fusion.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class Conv2dFusionOpConverter : public AnakinOpConverter { + public: + Conv2dFusionOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~Conv2dFusionOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.cc b/paddle/fluid/inference/anakin/convert/density_prior_box.cc new file mode 100644 index 0000000000000000000000000000000000000000..a55c153f99a815c0e0092b69b8e181630aed16bf --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.cc @@ -0,0 +1,90 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/density_prior_box.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + auto input_name = op_desc.Input("Input").front(); + auto image_name = op_desc.Input("Image").front(); + auto output_name = op_desc.Output("Boxes").front(); + + auto op_name = op_desc.Type() + ":" + op_desc.Output("Boxes").front(); + + auto fixed_sizes = + boost::get>(op_desc.GetAttr("fixed_sizes")); + auto fixed_ratios = + boost::get>(op_desc.GetAttr("fixed_ratios")); + auto densities = boost::get>(op_desc.GetAttr("densities")); + std::vector dens; + for (auto& ele : densities) { + dens.push_back(static_cast(ele)); + } + + // lack flip + // auto clip = boost::get(op_desc.GetAttr("clip")); + auto variances = boost::get>(op_desc.GetAttr("variances")); + for (auto& ele : variances) { + LOG(INFO) << ele; + } + + // lack img_h, img_w + auto step_h = boost::get(op_desc.GetAttr("step_h")); + auto step_w = boost::get(op_desc.GetAttr("step_w")); + auto offset = boost::get(op_desc.GetAttr("offset")); + PTuple t_order; + t_order.push_back("MIN"); + t_order.push_back("COM"); + t_order.push_back("MAX"); + + std::vector temp_v = {}; + + engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name}); + engine_->AddOpAttr>(op_name, "min_size", temp_v); + engine_->AddOpAttr>(op_name, "max_size", temp_v); + engine_->AddOpAttr>(op_name, "aspect_ratio", temp_v); + engine_->AddOpAttr>(op_name, "fixed_size", fixed_sizes); + engine_->AddOpAttr>(op_name, "fixed_ratio", fixed_ratios); + engine_->AddOpAttr>(op_name, "density", dens); + engine_->AddOpAttr(op_name, "is_flip", static_cast(false)); + engine_->AddOpAttr(op_name, "is_clip", static_cast(false)); + engine_->AddOpAttr>(op_name, "variance", variances); + engine_->AddOpAttr(op_name, "img_h", static_cast(0)); + engine_->AddOpAttr(op_name, "img_w", static_cast(0)); + engine_->AddOpAttr(op_name, "step_h", step_h); + engine_->AddOpAttr(op_name, "step_w", step_w); + engine_->AddOpAttr(op_name, "offset", offset); + engine_->AddOpAttr>(op_name, "order", t_order); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/density_prior_box.h b/paddle/fluid/inference/anakin/convert/density_prior_box.h new file mode 100644 index 0000000000000000000000000000000000000000..44265cbf2e968e8821bc1a9ae3225c9b7d405235 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/density_prior_box.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class DensityPriorBoxOpConverter : public AnakinOpConverter { + public: + DensityPriorBoxOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~DensityPriorBoxOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/detection_out.cc b/paddle/fluid/inference/anakin/convert/detection_out.cc new file mode 100644 index 0000000000000000000000000000000000000000..67636651017cfb18967cf8dc76d4f4a552fbd021 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/detection_out.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/detection_out.h" +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; + +namespace paddle { +namespace inference { +namespace anakin { + +void DetectionOutOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + auto target_name = op_desc.Input("TargetBox").front(); + auto prior_box_name = op_desc.Input("PriorBox").front(); + auto scores_name = op_desc.Input("Scores").front(); + auto output_name = op_desc.Output("Out").front(); + + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + auto code_type = boost::get(op_desc.GetAttr("code_type")); + auto background_label = boost::get(op_desc.GetAttr("background_label")); + auto score_threshold = boost::get(op_desc.GetAttr("score_threshold")); + auto nms_top_k = boost::get(op_desc.GetAttr("nms_top_k")); + auto nms_threshold = boost::get(op_desc.GetAttr("nms_threshold")); + auto nms_eta = boost::get(op_desc.GetAttr("nms_eta")); + auto keep_top_k = boost::get(op_desc.GetAttr("keep_top_k")); + std::string anakin_code_type; + if (code_type == "decode_center_size") { + anakin_code_type = "CENTER_SIZE"; + } else if (code_type == "encode_center_size") { + PADDLE_THROW( + "Not support encode_center_size code_type in DetectionOut of anakin"); + } + + engine_->AddOp(op_name, "DetectionOutput", + {target_name, scores_name, prior_box_name}, {output_name}); + engine_->AddOpAttr(op_name, "share_location", true); + engine_->AddOpAttr(op_name, "variance_encode_in_target", false); + engine_->AddOpAttr(op_name, "class_num", static_cast(0)); + engine_->AddOpAttr(op_name, "background_id", background_label); + engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k); + engine_->AddOpAttr(op_name, "code_type", anakin_code_type); + engine_->AddOpAttr(op_name, "conf_thresh", score_threshold); + engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k); + engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold); + engine_->AddOpAttr(op_name, "nms_eta", nms_eta); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(detection_out, DetectionOutOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/detection_out.h b/paddle/fluid/inference/anakin/convert/detection_out.h new file mode 100644 index 0000000000000000000000000000000000000000..5bf1c3ecbc89795d075301a2fd568312236bd874 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/detection_out.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class DetectionOutOpConverter : public AnakinOpConverter { + public: + DetectionOutOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~DetectionOutOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/dropout.cc b/paddle/fluid/inference/anakin/convert/dropout.cc new file mode 100644 index 0000000000000000000000000000000000000000..ed6d7f7561cb78666855146864b33254026926ef --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/dropout.cc @@ -0,0 +1,66 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/dropout.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void DropoutOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Mask").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Scale", {x_name}, {out_name}); + + auto dropout_prob = boost::get(op_desc.GetAttr("dropout_prob")); + auto factor = 1 - dropout_prob; + Shape shape1(std::vector({1, 1, 1, 1})); + auto *weight1 = + GraphGlobalMem::Global().template new_block(shape1); + auto *factor_data = static_cast(weight1->h_tensor().mutable_data()); + float weight1_data[] = {factor}; + std::copy(std::begin(weight1_data), std::end(weight1_data), factor_data); + + engine_->AddOpAttr(op_name, "weight_1", *weight1); + engine_->AddOpAttr(op_name, "axis", 0); + engine_->AddOpAttr(op_name, "num_axes", 0); + engine_->AddOpAttr(op_name, "bias_term", false); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(dropout, DropoutOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/dropout.h b/paddle/fluid/inference/anakin/convert/dropout.h new file mode 100644 index 0000000000000000000000000000000000000000..2a0fb6e76ac8354d884f9d815a4df785248e6475 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/dropout.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class DropoutOpConverter : public AnakinOpConverter { + public: + DropoutOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~DropoutOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/elementwise.cc b/paddle/fluid/inference/anakin/convert/elementwise.cc new file mode 100644 index 0000000000000000000000000000000000000000..55b12390baf90a9365fd4d197b19a3c5cd675afd --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/elementwise.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/elementwise.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void ElementwiseAddOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto y_name = op_desc.Input("Y").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name}); + std::string elementwise_type = "Add"; + engine_->AddOpAttr(op_name, "type", elementwise_type); + std::vector coeff = {1.0, 1.0}; + engine_->AddOpAttr>(op_name, "coeff", coeff); +} + +void ElementwiseMulOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto y_name = op_desc.Input("Y").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Scale", {x_name, y_name}, {out_name}); + // Fill a number to weight_1 as a placeholder. + Shape shape1(std::vector({1, 1, 1, 1})); + auto *weight1 = + GraphGlobalMem::Global().template new_block(shape1); + auto *placeholder_data = + static_cast(weight1->h_tensor().mutable_data()); + float weight1_data[] = {1}; + std::copy(std::begin(weight1_data), std::end(weight1_data), placeholder_data); + engine_->AddOpAttr(op_name, "weight_1", *weight1); + + auto axis = boost::get(op_desc.GetAttr("axis")); + engine_->AddOpAttr(op_name, "axis", axis); + engine_->AddOpAttr(op_name, "num_axes", 1); + engine_->AddOpAttr(op_name, "bias_term", false); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(elementwise_mul, ElementwiseMulOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/elementwise.h b/paddle/fluid/inference/anakin/convert/elementwise.h new file mode 100644 index 0000000000000000000000000000000000000000..47525e41daafcbca0c7c86bad44066f18a3ac79c --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/elementwise.h @@ -0,0 +1,49 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ElementwiseAddOpConverter : public AnakinOpConverter { + public: + ElementwiseAddOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ElementwiseAddOpConverter() {} + + private: +}; + +class ElementwiseMulOpConverter : public AnakinOpConverter { + public: + ElementwiseMulOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ElementwiseMulOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/fc.cc b/paddle/fluid/inference/anakin/convert/fc.cc index 33a5aff1de2851ad55c2df83cc48ba86f8ded754..2514eb1e093b4e05b7e6b2814cfd8185b3aede6c 100644 --- a/paddle/fluid/inference/anakin/convert/fc.cc +++ b/paddle/fluid/inference/anakin/convert/fc.cc @@ -14,60 +14,108 @@ #include "paddle/fluid/inference/anakin/convert/fc.h" #include +#include +#include using anakin::graph::GraphGlobalMem; using anakin::AK_FLOAT; -using anakin::Precision; using anakin::saber::NV; -using anakin::saber::X86; using anakin::saber::Shape; -using anakin::PBlock; -using anakin::PTuple; namespace paddle { namespace inference { namespace anakin { -void FcOpConverter::operator()(const framework::proto::OpDesc &op, - const framework::Scope &scope, bool test_mode) { +void FcBaseOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { framework::OpDesc op_desc(op, nullptr); - PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); - PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + auto input_names = op_desc.InputNames(); + bool with_bias = input_names.size() == 3; + + std::string w_name = "Y"; + std::string i_name = "X"; + if (with_bias) { + w_name = "W"; + i_name = "Input"; + } - auto x_name = op_desc.Input("X").front(); auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); - auto *y_v = scope.FindVar(op_desc.Input("Y").front()); + + // get weights + auto *y_v = scope.FindVar(op_desc.Input(w_name).front()); PADDLE_ENFORCE_NOT_NULL(y_v); auto *y_t = y_v->GetMutable(); - auto input_name = op_desc.Input("X").front(); + auto input_name = op_desc.Input(i_name).front(); auto output_name = op_desc.Output("Out").front(); - auto weight_shape = framework::vectorize2int(y_t->dims()); engine_->AddOp(op_name, "Dense", {input_name}, {output_name}); - engine_->AddOpAttr(op_name, "bias_term", false); + engine_->AddOpAttr(op_name, "bias_term", with_bias); engine_->AddOpAttr(op_name, "axis", 1); + + auto weight_shape = framework::vectorize2int(y_t->dims()); int out_dim = weight_shape[1]; engine_->AddOpAttr(op_name, "out_dim", out_dim); + const int w_m = weight_shape[0]; + const int w_k = weight_shape[1]; - weight_shape.push_back(1); - weight_shape.push_back(1); + if (weight_shape.size() < 4UL) { + weight_shape.insert(weight_shape.begin(), 4UL - weight_shape.size(), 1); + } Shape anakin_shape(weight_shape); framework::LoDTensor weight_tensor; weight_tensor.Resize(y_t->dims()); TensorCopySync((*y_t), platform::CPUPlace(), &weight_tensor); + auto *weight_data = weight_tensor.data(); + PADDLE_ENFORCE(w_m * w_k == weight_tensor.numel()); + std::vector trans_weight_data(weight_tensor.numel()); + for (int i = 0; i < w_m; i++) { + for (int j = 0; j < w_k; j++) { + trans_weight_data[i + j * w_m] = weight_data[i * w_k + j]; + } + } auto *weight1 = GraphGlobalMem::Global().template new_block(anakin_shape); float *cpu_data = static_cast(weight1->h_tensor().mutable_data()); - std::copy_n(weight_tensor.data(), weight_tensor.numel(), cpu_data); + std::copy_n(trans_weight_data.data(), weight_tensor.numel(), cpu_data); weight1->d_tensor().set_shape(anakin_shape); weight1->d_tensor().copy_from(weight1->h_tensor()); engine_->AddOpAttr(op_name, "weight_1", *weight1); + + // get bias + if (with_bias) { + auto *b_v = scope.FindVar(op_desc.Input("Bias").front()); + PADDLE_ENFORCE_NOT_NULL(b_v); + auto *b_t = b_v->GetMutable(); + + auto bias_shape = framework::vectorize2int(b_t->dims()); + framework::LoDTensor bias_tensor; + bias_tensor.Resize(b_t->dims()); + TensorCopySync((*b_t), platform::CPUPlace(), &bias_tensor); + auto *bias_data = bias_tensor.data(); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + bias_shape.insert(bias_shape.begin(), 1); + // bias_shape.push_back(1); + // bias_shape.push_back(1); + Shape anakin_bias_shape(bias_shape); + + auto *weight2 = GraphGlobalMem::Global().template new_block( + anakin_bias_shape); + float *cpu_data2 = static_cast(weight2->h_tensor().mutable_data()); + std::copy_n(bias_data, bias_tensor.numel(), cpu_data2); + weight2->d_tensor().set_shape(anakin_bias_shape); + weight2->d_tensor().copy_from(weight2->h_tensor()); + engine_->AddOpAttr(op_name, "weight_2", *weight2); + } } } // namespace anakin } // namespace inference } // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter); +REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/fc.h b/paddle/fluid/inference/anakin/convert/fc.h index b670486f12b36043a01ceb002da8756901ed01ce..060c649b19ef335a9e926eb205ec691a2a188fe1 100644 --- a/paddle/fluid/inference/anakin/convert/fc.h +++ b/paddle/fluid/inference/anakin/convert/fc.h @@ -20,19 +20,28 @@ namespace paddle { namespace inference { namespace anakin { -class FcOpConverter : public AnakinOpConverter { +class FcBaseOpConverter : public AnakinOpConverter { public: - FcOpConverter() = default; + FcBaseOpConverter() = default; virtual void operator()(const framework::proto::OpDesc &op, const framework::Scope &scope, bool test_mode) override; - virtual ~FcOpConverter() {} + virtual ~FcBaseOpConverter() {} +}; - private: +// with bias +class FcOpConverter : public FcBaseOpConverter { + public: + FcOpConverter() = default; +}; + +// without bias +class MulOpConverter : public FcBaseOpConverter { + public: + MulOpConverter() = default; }; -static Registrar register_fc_op_converter("fc"); } // namespace anakin } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/flatten.cc b/paddle/fluid/inference/anakin/convert/flatten.cc new file mode 100644 index 0000000000000000000000000000000000000000..c6c372bbef87de7f38c1f66a21c170cabac8c0ed --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/flatten.cc @@ -0,0 +1,51 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/flatten.h" +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void FlattenOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL); + + auto input = op_desc.Input("X").front(); + auto output = op_desc.Output("Out").front(); + int axis = boost::get(op_desc.GetAttr("axis")); + PADDLE_ENFORCE(axis == 1, + "the anakin flatten op converter now only support aixs == 1."); + + std::vector out_dims = {0, -1, 1, 1}; + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + engine_->AddOp(op_name, "Reshape", {input}, {output}); + engine_->AddOpAttr>(op_name, "dims", out_dims); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(flatten, FlattenOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/flatten.h b/paddle/fluid/inference/anakin/convert/flatten.h new file mode 100644 index 0000000000000000000000000000000000000000..1ace76b16381980a9eaec12806e0bc94d7b1fb85 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/flatten.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class FlattenOpConverter : public AnakinOpConverter { + public: + FlattenOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~FlattenOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.cc b/paddle/fluid/inference/anakin/convert/im2sequence.cc new file mode 100644 index 0000000000000000000000000000000000000000..568d7e4746f11b13ce8ea9e5a47a1b43d1c12693 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/im2sequence.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/im2sequence.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void Im2SequenceConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 0); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "Im2Sequence", {x_name}, {out_name}); + + std::vector dilations = {1, 1}; + auto paddings = boost::get>(op_desc.GetAttr("paddings")); + auto strides = boost::get>(op_desc.GetAttr("strides")); + auto kernels = boost::get>(op_desc.GetAttr("kernels")); + + engine_->AddOpAttr>(op_name, "paddings", paddings); + engine_->AddOpAttr>(op_name, "strides", strides); + engine_->AddOpAttr>(op_name, "window_size", kernels); + engine_->AddOpAttr>(op_name, "dilations", dilations); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(im2sequence, Im2SequenceConverter); diff --git a/paddle/fluid/inference/anakin/convert/im2sequence.h b/paddle/fluid/inference/anakin/convert/im2sequence.h new file mode 100644 index 0000000000000000000000000000000000000000..3003eac2c6f416663c3e7c4c3e297b6347edfb47 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/im2sequence.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class Im2SequenceConverter : public AnakinOpConverter { + public: + Im2SequenceConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~Im2SequenceConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/op_converter.h b/paddle/fluid/inference/anakin/convert/op_converter.h index b9a221079dcec78fc86ebed7dfac0c59ec0f8540..4603681e1e8a3c2841a62cc88b49a84950910e73 100644 --- a/paddle/fluid/inference/anakin/convert/op_converter.h +++ b/paddle/fluid/inference/anakin/convert/op_converter.h @@ -14,15 +14,16 @@ #pragma once +#include #include #include #include #include +#include #include "framework/core/types.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/inference/anakin/convert/registrar.h" #include "paddle/fluid/inference/anakin/engine.h" #include "paddle/fluid/inference/utils/singleton.h" #include "saber/saber_types.h" @@ -46,19 +47,14 @@ class AnakinOpConverter { bool test_mode = false) { framework::OpDesc op_desc(op, nullptr); std::string op_type = op_desc.Type(); - std::shared_ptr it{nullptr}; - - if (op_type == "mul") { - PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); - std::string Y = op_desc.Input("Y")[0]; - std::cout << Y << parameters.count(Y) << std::endl; - if (parameters.count(Y)) { - it = OpRegister::instance()->Get("fc"); - } - } + AnakinOpConverter *it = nullptr; + + if (op_type == "reshape2") op_type = "reshape"; + if (op_type == "transpose2") op_type = "transpose"; + if (op_type == "flatten2") op_type = "flatten"; if (!it) { - it = OpRegister::instance()->Get(op_type); + it = Registry::Global().Lookup(op_type); } PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type); it->SetEngine(engine); @@ -74,6 +70,63 @@ class AnakinOpConverter { ConvertOp(op, parameters, scope, engine); } } + + // The scope here should be inited with the parameter vars. + void ConvertBlockToAnakinEngine( + framework::BlockDesc *block_desc, framework::Scope *scope, + const std::vector &inputs, + const std::unordered_set ¶meters, + const std::vector &outputs, AnakinNvEngine *engine) { + framework::proto::BlockDesc *block_proto = block_desc->Proto(); + ConvertBlock(*block_proto, parameters, *scope, engine); + + engine->Freeze(); + // if the max_batch size + int max_batch_size = engine->GetMaxBatchSize(); + PADDLE_ENFORCE(max_batch_size > 0, + "the max_batch_size setted from config->EnableAnakinEngine " + "must largger than 0"); + // If the user does not specify this variable, we use the input shape from + // the block_desc. + auto max_input_shape = engine->GetMaxInputShape(); + std::map> temp_max_input_shape; + + for (auto &input : inputs) { + if (parameters.count(input)) continue; + std::vector input_shape; + input_shape.resize(4); + input_shape[0] = max_batch_size; + if (max_input_shape.count(input)) { + PADDLE_ENFORCE(max_input_shape[input].size() == 4, + "the dimensions of max_input_shape setted from " + "config->EnableAnakinEngine must be 4"); + for (int i = 1; i < 4; i++) { + input_shape[i] = max_input_shape[input][i]; + } + } else { + auto *var = block_desc->FindVar(input); + PADDLE_ENFORCE(var, "no variable called %s", input); + + auto var_shape = var->GetShape(); + std::cout << "input :" << input << std::endl; + PADDLE_ENFORCE(var_shape.size() == 4); + + for (size_t i = 1; i < var_shape.size(); i++) { + input_shape[i] = var_shape[i]; + } + } + temp_max_input_shape[input] = input_shape; + engine->SetInputShape(input, input_shape); + engine->Graph()->RegistVar(input); // For share from data. + } + engine->SetMaxInputShape(temp_max_input_shape); + engine->Optimize(); + + // For anakin share with fluid tensor. + engine->AllocTmpMem(); + engine->InitGraph(); + } + void SetEngine(AnakinNvEngine *engine) { engine_ = engine; } virtual ~AnakinOpConverter() {} @@ -91,22 +144,23 @@ class AnakinOpConverter { } // namespace inference } // namespace paddle -#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ - struct anakin_##op_type__##_converter \ - : public ::paddle::framework::Registrar { \ - anakin_##op_type__##_converter() { \ - ::paddle::inference:: \ - Registry::Register< \ - ::paddle::inference::anakin::Converter__>(#op_type__); \ - } \ - }; \ - anakin_##op_type__##_converter anakin_##op_type__##_converter__; \ - int TouchConverterRegister_anakin_##op_type__() { \ - anakin_##op_type__##_converter__.Touch(); \ - return 0; \ +#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \ + struct anakin_##op_type__##_converter \ + : public ::paddle::framework::Registrar { \ + anakin_##op_type__##_converter() { \ + LOG(INFO) << "register convert " << #op_type__; \ + ::paddle::inference::Registry< \ + ::paddle::inference::anakin::AnakinOpConverter>::Global() \ + .Register<::paddle::inference::anakin::Converter__>(#op_type__); \ + } \ + }; \ + anakin_##op_type__##_converter anakin_##op_type__##_converter__; \ + int TouchConverterRegister_anakin_##op_type__() { \ + anakin_##op_type__##_converter__.Touch(); \ + return 0; \ } -#define USE_ANAKIN_CONVERTER(op_type__) \ - extern int TouchConverterRegister_anakin_##op_type__(); \ - static int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \ +#define USE_ANAKIN_CONVERTER(op_type__) \ + extern int TouchConverterRegister_anakin_##op_type__(); \ + int use_op_converter_anakin_##op_type__ __attribute__((unused)) = \ TouchConverterRegister_anakin_##op_type__(); diff --git a/paddle/fluid/inference/anakin/convert/pool2d.cc b/paddle/fluid/inference/anakin/convert/pool2d.cc new file mode 100644 index 0000000000000000000000000000000000000000..9b01d56a126b2ebc194f5b5bb5b2f52c298a316e --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/pool2d.cc @@ -0,0 +1,80 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/pool2d.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto x_name = op_desc.Input("X").front(); + auto y_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + bool global_pooling = boost::get(op_desc.GetAttr("global_pooling")); + std::string pool_type = + boost::get(op_desc.GetAttr("pooling_type")); + std::vector ksize = + boost::get>(op_desc.GetAttr("ksize")); + std::vector strides = + boost::get>(op_desc.GetAttr("strides")); + std::vector paddings = + boost::get>(op_desc.GetAttr("paddings")); + bool ceil_mode = boost::get(op_desc.GetAttr("ceil_mode")); + std::string anakin_pool_type; + if (pool_type == "max") { + anakin_pool_type = "MAX"; + } else if (pool_type == "avg") { + if (paddings[0] || paddings[1]) { + anakin_pool_type = "AVGEXC"; + } else { + anakin_pool_type = "AVG"; + } + } else { + PADDLE_THROW("TensorRT unsupported pooling type!"); + } + + engine_->AddOp(op_name, "Pooling", {x_name}, {y_name}); + engine_->AddOpAttr>(op_name, "pool_size", ksize); + engine_->AddOpAttr>(op_name, "strides", strides); + engine_->AddOpAttr>(op_name, "padding", paddings); + engine_->AddOpAttr(op_name, "method", anakin_pool_type); + engine_->AddOpAttr(op_name, "global_pooling", global_pooling); + engine_->AddOpAttr(op_name, "cmp_out_shape_floor_as_conv", !ceil_mode); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(pool2d, Pool2dOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/pool2d.h b/paddle/fluid/inference/anakin/convert/pool2d.h new file mode 100644 index 0000000000000000000000000000000000000000..1931a03c7ac236b4e57236cd1eb2947110f279a8 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/pool2d.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class Pool2dOpConverter : public AnakinOpConverter { + public: + Pool2dOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~Pool2dOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/registrar.h b/paddle/fluid/inference/anakin/convert/registrar.h deleted file mode 100644 index afce66ca084143ae203af9a60089aa2f5d18a725..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/anakin/convert/registrar.h +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include - -namespace paddle { -namespace inference { -namespace anakin { - -class AnakinOpConverter; - -class OpRegister { - public: - OpRegister() = default; - std::shared_ptr Get(const std::string &name); - static OpRegister *instance(); - void OpRegisterFn(const std::string &name, - std::function()> fn) { - registry_[name] = fn; - } - - private: - using RegisterFnType = std::function()>; - std::map()>> - registry_; -}; - -template -class Registrar { - public: - Registrar(const std::string &name, Args... args) { - std::shared_ptr converter = - std::make_shared(std::move(args)...); - OpRegister::instance()->OpRegisterFn(name, - [converter]() { return converter; }); - } -}; - -} // namespace anakin -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/relu.cc b/paddle/fluid/inference/anakin/convert/relu.cc new file mode 100644 index 0000000000000000000000000000000000000000..2ce96db1804a3d6d6d1afac79e4e1fc55ed4c35d --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/relu.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/relu.h" +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; + +namespace paddle { +namespace inference { +namespace anakin { + +void ReluOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + auto input_name = op_desc.Input("X").front(); + auto output_name = op_desc.Output("Out").front(); + + engine_->AddOp(op_name, "ReLU", {input_name}, {output_name}); + engine_->AddOpAttr(op_name, "alpha", 0); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/relu.h b/paddle/fluid/inference/anakin/convert/relu.h new file mode 100644 index 0000000000000000000000000000000000000000..54c4c2316eb32ef70696a2477211008e04892552 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/relu.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ReluOpConverter : public AnakinOpConverter { + public: + ReluOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ReluOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/reshape.cc b/paddle/fluid/inference/anakin/convert/reshape.cc new file mode 100644 index 0000000000000000000000000000000000000000..eee36d2f37ea79c841ac8bf60c6e533069d06240 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/reshape.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/reshape.h" +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void ReshapeOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL); + + auto input = op_desc.Input("X").front(); + auto output = op_desc.Output("Out").front(); + + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + engine_->AddOp(op_name, "Reshape", {input}, {output}); + + auto shape = boost::get>(op_desc.GetAttr("shape")); + if (shape.size() < 4) { + shape.insert(shape.end(), 4 - shape.size(), 1); + } + engine_->AddOpAttr>(op_name, "dims", shape); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(reshape, ReshapeOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/reshape.h b/paddle/fluid/inference/anakin/convert/reshape.h new file mode 100644 index 0000000000000000000000000000000000000000..970e8ce5572572bd18c34eeffa902fa2495c1cce --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/reshape.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ReshapeOpConverter : public AnakinOpConverter { + public: + ReshapeOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ReshapeOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/scale.cc b/paddle/fluid/inference/anakin/convert/scale.cc new file mode 100644 index 0000000000000000000000000000000000000000..6f3aa8c5d1111dc2829e241c9331eeb521003c03 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/scale.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/scale.h" +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; + +namespace paddle { +namespace inference { +namespace anakin { + +void ScaleOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + auto input_name = op_desc.Input("X").front(); + auto output_name = op_desc.Output("Out").front(); + float scale = boost::get(op_desc.GetAttr("scale")); + float bias = boost::get(op_desc.GetAttr("bias")); + float bias_after_scale = + boost::get(op_desc.GetAttr("bias_after_scale")); + PADDLE_ENFORCE(bias_after_scale, + "The anakin scale layer only support bias after scale now."); + + engine_->AddOp(op_name, "Power", {input_name}, {output_name}); + engine_->AddOpAttr(op_name, "shift", bias); + engine_->AddOpAttr(op_name, "scale", scale); + engine_->AddOpAttr(op_name, "power", static_cast(1.0)); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(scale, ScaleOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/scale.h b/paddle/fluid/inference/anakin/convert/scale.h new file mode 100644 index 0000000000000000000000000000000000000000..b858e3c512494f80c7c3818a570e43d90d65251b --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/scale.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class ScaleOpConverter : public AnakinOpConverter { + public: + ScaleOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~ScaleOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/softmax.cc b/paddle/fluid/inference/anakin/convert/softmax.cc new file mode 100644 index 0000000000000000000000000000000000000000..d5cd8908ebf623f0334a3b4df2b19147c63f77a3 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/softmax.cc @@ -0,0 +1,43 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/softmax.h" + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; + +namespace paddle { +namespace inference { +namespace anakin { + +void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL); + + auto input = op_desc.Input("X").front(); + auto output = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + engine_->AddOp(op_name, "Softmax", {input}, {output}); + engine_->AddOpAttr(op_name, "axis", 2); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(softmax, SoftMaxOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/softmax.h b/paddle/fluid/inference/anakin/convert/softmax.h new file mode 100644 index 0000000000000000000000000000000000000000..0508da0c6fecaf29b7376005904235dadf04ea28 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/softmax.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class SoftMaxOpConverter : public AnakinOpConverter { + public: + SoftMaxOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~SoftMaxOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/split.cc b/paddle/fluid/inference/anakin/convert/split.cc new file mode 100644 index 0000000000000000000000000000000000000000..b8464a766d21e93426eb4a00b8caab2af5470055 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/split.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/split.h" +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void SplitOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + auto input_name = op_desc.Input("X").front(); + auto y_names = op_desc.Output("Out"); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + int axis = boost::get(op_desc.GetAttr("axis")); + + std::vector output_lengths = + boost::get>(op_desc.GetAttr("sections")); + + int split_num = output_lengths.size(); + PADDLE_ENFORCE(split_num > 1, + "anakin split op converter: the split num should > 1"); + int num_sum = 0; + std::vector slice_point; + for (int i = 0; i < split_num - 1; i++) { + num_sum += output_lengths[i]; + slice_point.push_back(num_sum); + } + engine_->AddOp(op_name, "Slice", {input_name}, y_names); + engine_->AddOpAttr(op_name, "axis", axis); + engine_->AddOpAttr>(op_name, "slice_point", slice_point); + // slice_dim is useless in anakin + engine_->AddOpAttr(op_name, "slice_dim", 4); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle +REGISTER_ANAKIN_OP_CONVERTER(split, SplitOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/split.h b/paddle/fluid/inference/anakin/convert/split.h new file mode 100644 index 0000000000000000000000000000000000000000..a4c6a14e62168ffaf5ff67b5cf953d477ff9e34d --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/split.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class SplitOpConverter : public AnakinOpConverter { + public: + SplitOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~SplitOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/sum.cc b/paddle/fluid/inference/anakin/convert/sum.cc new file mode 100644 index 0000000000000000000000000000000000000000..df9104cf4631d86e0cbd87cb0e93a96d984953f5 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/sum.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/sum.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void SumOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 2); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto input_names = op_desc.Input("X"); + auto out_name = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + + std::vector coeff = {1, 1}; + std::string elementwise_type = "Add"; + engine_->AddOp(op_name, "Eltwise", input_names, {out_name}); + engine_->AddOpAttr>(op_name, "coeff", coeff); + engine_->AddOpAttr(op_name, "type", elementwise_type); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(sum, SumOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/sum.h b/paddle/fluid/inference/anakin/convert/sum.h new file mode 100644 index 0000000000000000000000000000000000000000..ddecc4b3bcb84f83af95e77399847f191c785563 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/sum.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class SumOpConverter : public AnakinOpConverter { + public: + SumOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~SumOpConverter() {} + + private: +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/test_activation_op.cc b/paddle/fluid/inference/anakin/convert/test_activation_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8bedd4a749a645829658291310347eeed1c0ea49 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_activation_op.cc @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/activation.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +static void test_activation_op(const std::string &op_type) { + auto *converter = Registry::Global().Lookup(op_type); + PADDLE_ENFORCE(converter != nullptr); + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("act-X", {10, 6, 1, 1}); + validator.DeclOutputVar("act-Out", {10, 6, 1, 1}); + framework::OpDesc desc; + desc.SetType(op_type); + desc.SetInput("X", {"act-X"}); + desc.SetOutput("Out", {"act-Out"}); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(5); +} + +TEST(sigm_op, test) { test_activation_op("sigmoid"); } +TEST(tanh_op, test) { test_activation_op("tanh"); } +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(sigmoid); +USE_OP(tanh); +USE_ANAKIN_CONVERTER(sigmoid); +USE_ANAKIN_CONVERTER(tanh); diff --git a/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc b/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..2832e1c8d167c646c9049beebc57a82fe416e62c --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(batch_norm_op, test) { + std::unordered_set parameters( + {"batch_norm_scale", "batch_norm_bias", "batch_norm_mean", + "batch_norm_variance"}); + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + std::vector param_shape{2}; + + validator.DeclInputVar("batch_norm_X", {1, 2, 5, 5}); + validator.DeclParamVar("batch_norm_scale", param_shape); + validator.DeclParamVar("batch_norm_bias", param_shape); + validator.DeclParamVar("batch_norm_mean", param_shape); + validator.DeclParamVar("batch_norm_variance", param_shape); + validator.DeclOutputVar("batch_norm_Y", {1, 2, 5, 5}); + validator.DeclOutputVar("batch_norm_save_mean", param_shape); + validator.DeclOutputVar("batch_norm_save_variance", param_shape); + + // Prepare Op description + framework::OpDesc desc; + + desc.SetType("batch_norm"); + desc.SetInput("X", {"batch_norm_X"}); + desc.SetInput("Scale", {"batch_norm_scale"}); + desc.SetInput("Bias", {"batch_norm_bias"}); + desc.SetInput("Mean", {"batch_norm_mean"}); + desc.SetInput("Variance", {"batch_norm_variance"}); + desc.SetOutput("Y", {"batch_norm_Y"}); + desc.SetOutput("MeanOut", {"batch_norm_mean"}); + desc.SetOutput("VarianceOut", {"batch_norm_variance"}); + desc.SetOutput("SavedMean", {"batch_norm_save_mean"}); + desc.SetOutput("SavedVariance", {"batch_norm_save_variance"}); + + float eps = 1e-5f; + bool is_test = true; + desc.SetAttr("epsilon", eps); + desc.SetAttr("is_test", is_test); + + validator.SetOp(*desc.Proto()); + + std::unordered_set neglected_output = { + "batch_norm_save_mean", "batch_norm_save_variance", "batch_norm_mean", + "batch_norm_variance"}; + validator.Execute(1, neglected_output); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle +USE_OP(batch_norm); +USE_ANAKIN_CONVERTER(batch_norm); diff --git a/paddle/fluid/inference/anakin/convert/test_concat_op.cc b/paddle/fluid/inference/anakin/convert/test_concat_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..ecf44def5a2429360f0bcb92f00a0423e1d491cd --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_concat_op.cc @@ -0,0 +1,74 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/concat.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(concat_op, test) { + std::unordered_set parameters({""}); + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("concat_x1", {1, 2, 1, 1}); + validator.DeclInputVar("concat_x2", {1, 3, 1, 1}); + validator.DeclInputVar("concat_x3", {1, 1, 1, 1}); + validator.DeclOutputVar("concat_out", {1, 6, 1, 1}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("concat"); + desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"}); + desc.SetOutput("Out", {"concat_out"}); + + int axis = 1; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + + validator.Execute(1); +} + +TEST(concat_op, test2) { + std::unordered_set parameters({""}); + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("concat_x1", {1, 4}); + validator.DeclInputVar("concat_x2", {3, 4}); + validator.DeclInputVar("concat_x3", {2, 4}); + validator.DeclOutputVar("concat_out", {6, 4}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("concat"); + desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"}); + desc.SetOutput("Out", {"concat_out"}); + + int axis = 0; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle +USE_OP(concat); +USE_ANAKIN_CONVERTER(concat); diff --git a/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6d93e50bc96b08b6ef7dd7c9d836038e335daae3 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_conv2d_op.cc @@ -0,0 +1,62 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/conv2d.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(conv2d_op, test) { + auto* conv2d_converter = + Registry::Global().Lookup("conv2d"); + ASSERT_TRUE(conv2d_converter != nullptr); + std::unordered_set parameters({"conv2d-Y"}); + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("conv2d-X", {1, 3, 3, 3}); + validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1}); + validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("conv2d"); + desc.SetInput("Input", {"conv2d-X"}); + desc.SetInput("Filter", {"conv2d-Y"}); + desc.SetOutput("Output", {"conv2d-Out"}); + + const std::vector strides({1, 1}); + const std::vector paddings({0, 0}); + const std::vector dilations({1, 1}); + const int groups = 1; + + desc.SetAttr("strides", strides); + desc.SetAttr("paddings", paddings); + desc.SetAttr("dilations", dilations); + desc.SetAttr("groups", groups); + + validator.SetOp(*desc.Proto()); + + validator.Execute(3); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(conv2d); +USE_ANAKIN_CONVERTER(conv2d); diff --git a/paddle/fluid/inference/anakin/convert/test_dropout_op.cc b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..b2de5ae0a6e58eb25a4588571686a25500fe546c --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_dropout_op.cc @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/dropout.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(dropout_op, native) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("x", {1, 1, 2, 2}); + validator.DeclOutputVar("out", {1, 1, 2, 2}); + validator.DeclOutputVar("mask", {1, 1, 2, 2}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("dropout"); + desc.SetInput("X", {"x"}); + desc.SetOutput("Out", {"out"}); + desc.SetOutput("Mask", {"mask"}); + + float dropout_prob = 0.5; + desc.SetAttr("dropout_prob", dropout_prob); + desc.SetAttr("is_test", true); + + validator.SetOp(*desc.Proto()); + std::unordered_set neglected_output = {"mask"}; + validator.Execute(1, neglected_output); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(dropout); +USE_ANAKIN_CONVERTER(dropout); diff --git a/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..3a437f5fdb565609667b7a862c9b2bb13cdbeded --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_elementwise_op.cc @@ -0,0 +1,56 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/elementwise.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +static void test_elementwise_op(const std::string &op_type) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("x", {1, 1, 2, 2}); + validator.DeclInputVar("y", {1, 1, 2, 2}); + validator.DeclOutputVar("out", {1, 1, 2, 2}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType(op_type); + desc.SetInput("X", {"x"}); + desc.SetInput("Y", {"y"}); + desc.SetOutput("Out", {"out"}); + + int axis = -1; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + validator.Execute(1); +} + +TEST(elementwise_op, native_add) { test_elementwise_op("elementwise_add"); } +TEST(elementwise_op, native_mul) { test_elementwise_op("elementwise_mul"); } + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(elementwise_add); +USE_ANAKIN_CONVERTER(elementwise_add); +USE_OP(elementwise_mul); +USE_ANAKIN_CONVERTER(elementwise_mul); diff --git a/paddle/fluid/inference/anakin/convert/test_fc_op.cc b/paddle/fluid/inference/anakin/convert/test_fc_op.cc index 7b8ceefe28873f0ffb9cedbb04b832ba029b7de4..ee6d1dc291fe3733ff2e9f66dd453120fa266a55 100644 --- a/paddle/fluid/inference/anakin/convert/test_fc_op.cc +++ b/paddle/fluid/inference/anakin/convert/test_fc_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/fluid/inference/anakin/convert/fc.h" #include "paddle/fluid/inference/anakin/convert/op_converter.h" #include "paddle/fluid/inference/anakin/convert/ut_helper.h" @@ -22,17 +21,15 @@ namespace inference { namespace anakin { TEST(fc_op, test) { - auto fc_converter = OpRegister::instance()->Get("fc"); - ASSERT_TRUE(fc_converter != nullptr); - // Registrar register_fc("fc"); - // auto fc = std::make_shared(); + auto* fc_converter = Registry::Global().Lookup("fc"); + ASSERT_TRUE(fc_converter); std::unordered_set parameters({"mul_y"}); framework::Scope scope; - AnakinConvertValidation validator(parameters, scope); - validator.DeclInputVar("mul_x", {1, 1, 1, 1}); - validator.DeclParamVar("mul_y", {1, 2}); - validator.DeclOutputVar("mul_out", {1, 1, 1, 2}); + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("mul_x", {1, 1, 2, 2}); + validator.DeclParamVar("mul_y", {4, 2}); + validator.DeclOutputVar("mul_out", {1, 2}); // Prepare Op description framework::OpDesc desc; @@ -40,8 +37,6 @@ TEST(fc_op, test) { desc.SetInput("X", {"mul_x"}); desc.SetInput("Y", {"mul_y"}); desc.SetOutput("Out", {"mul_out"}); - int num_flatten_dims = 3; - desc.SetAttr("x_num_col_dims", num_flatten_dims); validator.SetOp(*desc.Proto()); validator.Execute(10); @@ -52,3 +47,4 @@ TEST(fc_op, test) { } // namespace paddle USE_OP(mul); +USE_ANAKIN_CONVERTER(fc); diff --git a/paddle/fluid/inference/anakin/convert/test_flatten_op.cc b/paddle/fluid/inference/anakin/convert/test_flatten_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..d13281f11f03fdd75e585bce8b30e8780d81f7d7 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_flatten_op.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(flatten_op, test) { + auto *converter = Registry::Global().Lookup("flatten"); + ASSERT_TRUE(converter); + + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("flatten-X", {3, 10, 10, 4}); + validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1}); + framework::OpDesc desc; + desc.SetType("flatten"); + desc.SetInput("X", {"flatten-X"}); + desc.SetOutput("Out", {"flatten-Out"}); + desc.SetAttr("axis", 1); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(5); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(reshape); +USE_OP_ITSELF(flatten); +USE_ANAKIN_CONVERTER(flatten); diff --git a/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc b/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5e5764633125c867e27b0b52e0e6ef18714653b2 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_im2sequence_op.cc @@ -0,0 +1,55 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/im2sequence.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(im2sequence_op, native) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + + std::vector kernels = {6, 1}; + std::vector strides = {1, 1}; + std::vector paddings = {0, 0, 0, 0}; + + validator.DeclInputVar("x", {1, 1, 2, 2}); + validator.DeclOutputVar("out", {1, 1 * kernels[0] * kernels[1]}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("im2sequence"); + desc.SetInput("X", {"x"}); + desc.SetOutput("Out", {"out"}); + + desc.SetAttr("kernels", kernels); + desc.SetAttr("strides", strides); + desc.SetAttr("paddings", paddings); + + validator.SetOp(*desc.Proto()); + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(im2sequence); +USE_ANAKIN_CONVERTER(im2sequence); diff --git a/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc b/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1ac019467721605c539c7ada452d04d5134fa341 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_pool2d_op.cc @@ -0,0 +1,119 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +void test_pool2d(bool global_pooling, bool ceil_mode, + std::string pool_type = "max") { + auto* pool2d_converter = + Registry::Global().Lookup("pool2d"); + ASSERT_TRUE(pool2d_converter); + + framework::Scope scope; + std::unordered_set parameters; + AnakinConvertValidation validator(parameters, &scope); + + // The ITensor's Dims should not contain the batch size. + // So, the ITensor's Dims of input and output should be C * H * W. + validator.DeclInputVar("pool2d_x", {1, 3, 6, 7}); + if (global_pooling) + validator.DeclOutputVar("pool2d_out", {1, 3, 1, 1}); + else if (ceil_mode) + validator.DeclOutputVar("pool2d_out", {1, 3, 3, 4}); + else + validator.DeclOutputVar("pool2d_out", {1, 3, 3, 3}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("pool2d"); + desc.SetInput("X", {"pool2d_x"}); + desc.SetOutput("Out", {"pool2d_out"}); + + std::vector ksize({2, 2}); + std::vector strides({2, 2}); + std::vector paddings({0, 0}); + std::string pooling_t = pool_type; + + desc.SetAttr("pooling_type", pooling_t); + desc.SetAttr("ksize", ksize); + desc.SetAttr("strides", strides); + desc.SetAttr("paddings", paddings); + desc.SetAttr("global_pooling", global_pooling); + desc.SetAttr("ceil_mode", ceil_mode); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(1); +} + +void test_pool2d2(bool global_pooling, bool ceil_mode, + std::string pool_type = "max") { + auto* pool2d_converter = + Registry::Global().Lookup("pool2d"); + ASSERT_TRUE(pool2d_converter); + + framework::Scope scope; + std::unordered_set parameters; + AnakinConvertValidation validator(parameters, &scope); + + // The ITensor's Dims should not contain the batch size. + // So, the ITensor's Dims of input and output should be C * H * W. + validator.DeclInputVar("pool2d_x", {1, 1, 17, 17}); + validator.DeclOutputVar("pool2d_out", {1, 1, 17, 17}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("pool2d"); + desc.SetInput("X", {"pool2d_x"}); + desc.SetOutput("Out", {"pool2d_out"}); + + std::vector ksize({3, 3}); + std::vector strides({1, 1}); + std::vector paddings({1, 1}); + std::string pooling_t = pool_type; + + desc.SetAttr("pooling_type", pooling_t); + desc.SetAttr("ksize", ksize); + desc.SetAttr("strides", strides); + desc.SetAttr("paddings", paddings); + desc.SetAttr("global_pooling", global_pooling); + desc.SetAttr("ceil_mode", true); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(1); +} + +TEST(Pool2dOpConverter, normal) { test_pool2d(false, false); } +TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true, false); } + +TEST(Pool2dOpConverter, max_ceil_test) { test_pool2d(false, true); } +TEST(Pool2dOpConverter, avg_ceil_test) { test_pool2d(false, true, "avg"); } +TEST(Pool2dOpConverter, avg_ceil_test2) { test_pool2d2(false, true, "avg"); } + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(pool2d); +USE_ANAKIN_CONVERTER(pool2d); diff --git a/paddle/fluid/inference/anakin/convert/test_relu_op.cc b/paddle/fluid/inference/anakin/convert/test_relu_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..04e624518a5a4477bbb41475b575f85be5a120d4 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_relu_op.cc @@ -0,0 +1,50 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/relu.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +static void test_activation_op(const std::string &op_type) { + auto *converter = Registry::Global().Lookup(op_type); + PADDLE_ENFORCE(converter != nullptr); + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("act-X", {10, 6, 1, 1}); + validator.DeclOutputVar("act-Out", {10, 6, 1, 1}); + framework::OpDesc desc; + desc.SetType(op_type); + desc.SetInput("X", {"act-X"}); + desc.SetOutput("Out", {"act-Out"}); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(5); +} + +TEST(sigm_op, test) { test_activation_op("relu"); } +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(relu); +USE_ANAKIN_CONVERTER(relu); diff --git a/paddle/fluid/inference/anakin/convert/test_reshape_op.cc b/paddle/fluid/inference/anakin/convert/test_reshape_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..306ebf510f29a87ca1ffa6df86e08f86b3f8ffbb --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_reshape_op.cc @@ -0,0 +1,74 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(reshape, test) { + auto* converter = Registry::Global().Lookup("reshape"); + ASSERT_TRUE(converter); + framework::Scope scope; + std::unordered_set parameters; + AnakinConvertValidation validator(parameters, &scope); + + // validator.DeclInputVar("reshape-X", {2, 3, 3, 1}); + // validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3}); + validator.DeclInputVar("reshape-X", {1, 2, 4, 1}); + validator.DeclOutputVar("reshape-Out", {1, 8, 1, 1}); + + framework::OpDesc desc; + desc.SetType("reshape"); + desc.SetInput("X", {"reshape-X"}); + desc.SetOutput("Out", {"reshape-Out"}); + // desc.SetAttr("shape", std::vector({3, 2, 1, 3})); + desc.SetAttr("shape", std::vector({1, 8, 1, 1})); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + validator.Execute(1); +} + +TEST(reshape, test2) { + framework::Scope scope; + std::unordered_set parameters; + AnakinConvertValidation validator(parameters, &scope); + + validator.DeclInputVar("reshape-X", {1, 2, 4}); + validator.DeclOutputVar("reshape-Out", {1, 4, 2}); + + framework::OpDesc desc; + desc.SetType("reshape"); + desc.SetInput("X", {"reshape-X"}); + desc.SetOutput("Out", {"reshape-Out"}); + // desc.SetAttr("shape", std::vector({3, 2, 1, 3})); + desc.SetAttr("shape", std::vector({0, -1, 2})); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(reshape); +USE_ANAKIN_CONVERTER(reshape); diff --git a/paddle/fluid/inference/anakin/convert/test_softmax_op.cc b/paddle/fluid/inference/anakin/convert/test_softmax_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8c14fae0a67b9e488cf072535868a34f6195ab71 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_softmax_op.cc @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(softmax, test) { + auto* converter = Registry::Global().Lookup("softmax"); + ASSERT_TRUE(converter); + framework::Scope scope; + std::unordered_set parameters; + AnakinConvertValidation validator(parameters, &scope); + + validator.DeclInputVar("softmax-X", {1, 10, 2}); + validator.DeclOutputVar("softmax-Out", {1, 10, 2}); + + framework::OpDesc desc; + desc.SetType("softmax"); + desc.SetInput("X", {"softmax-X"}); + desc.SetOutput("Out", {"softmax-Out"}); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(softmax); +USE_ANAKIN_CONVERTER(softmax); diff --git a/paddle/fluid/inference/anakin/convert/test_split_op.cc b/paddle/fluid/inference/anakin/convert/test_split_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..aa61c01a511c2337944aadbbc3d47893487de683 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_split_op.cc @@ -0,0 +1,110 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/split.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +template +void AnakinSliceTest(const std::vector &in_shape, + const std::vector §ions) { + std::unordered_set parameters({""}); + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + + validator.DeclInputVar("split_input", in_shape); + std::vector output_vars; + for (size_t i = 0; i < sections.size(); ++i) { + auto out_shape = in_shape; + out_shape[Axis] = sections[i]; + std::string output_name = "split_out" + std::to_string(i); + validator.DeclOutputVar(output_name, out_shape); + output_vars.push_back(output_name); + } + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("split"); + desc.SetInput("X", {"split_input"}); + desc.SetOutput("Out", output_vars); + + desc.SetAttr("axis", Axis); + desc.SetAttr("num", 0); + desc.SetAttr("sections", sections); + + validator.SetOp(*desc.Proto()); + + validator.Execute(1); +} + +// batch = 0, axis = 1, same shape +TEST(split_op, test_same_shape_axis1_batch1) { + AnakinSliceTest<1>({1, 4, 2, 2}, {2, 2}); +} +// batch = 0, axis = 1, different shape +TEST(split_op, test_different_shape_axis1_batch1) { + AnakinSliceTest<1>({1, 3, 2, 2}, {2, 1}); +} +// batch = 10, axis = 1, same shape +TEST(split_op, test_same_shape_axis1_batch10) { + AnakinSliceTest<1>({1, 4, 2, 2}, {2, 2}); +} +// batch = 10, axis = 1, different shape +TEST(split_op, test_different_shape_axis1_batch10) { + AnakinSliceTest<1>({1, 3, 2, 2}, {2, 1}); +} +// batch = 0, axis = 2, same shape +TEST(split_op, test_same_shape_axis2_batch1) { + AnakinSliceTest<2>({1, 3, 4, 2}, {2, 2}); +} +// batch = 0, axis = 2, different shape +TEST(split_op, test_different_shape_axis2_batch1) { + AnakinSliceTest<2>({1, 3, 3, 2}, {2, 1}); +} +// batch = 10, axis = 2, same shape +TEST(split_op, test_same_shape_axis2_batch10) { + AnakinSliceTest<2>({1, 3, 4, 2}, {2, 2}); +} +// batch = 10, axis = 2, different shape +TEST(split_op, test_different_shape_axis2_batch10) { + AnakinSliceTest<2>({1, 3, 3, 2}, {2, 1}); +} +// batch = 0, axis = 3, same shape +TEST(split_op, test_same_shape_axis3_batch1) { + AnakinSliceTest<3>({1, 3, 2, 4}, {2, 2}); +} +// batch = 0, axis = 3, different shape +TEST(split_op, test_different_shape_axis3_batch1) { + AnakinSliceTest<3>({1, 3, 2, 3}, {2, 1}); +} +// batch = 10, axis = 3, same shape +TEST(split_op, test_same_shape_axis3_batch10) { + AnakinSliceTest<3>({1, 3, 2, 4}, {2, 2}); +} +// batch = 10, axis = 3, different shape +TEST(split_op, test_different_shape_axis3_batch10) { + AnakinSliceTest<3>({1, 3, 2, 3}, {2, 1}); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(split); +USE_ANAKIN_CONVERTER(split); diff --git a/paddle/fluid/inference/anakin/convert/test_sum_op.cc b/paddle/fluid/inference/anakin/convert/test_sum_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..d6a59a0166be9239b480221cc076069239403429 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_sum_op.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/sum.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" +#include "paddle/fluid/operators/sum_op.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(sum, native) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("sum_x1", {1, 2, 1, 2}); + validator.DeclInputVar("sum_x2", {1, 2, 1, 2}); + validator.DeclOutputVar("sum_out", {1, 2, 1, 2}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("sum"); + desc.SetInput("X", {"sum_x1", "sum_x2"}); + desc.SetOutput("Out", {"sum_out"}); + + validator.SetOp(*desc.Proto()); + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(sum); +USE_ANAKIN_CONVERTER(sum); diff --git a/paddle/fluid/inference/anakin/convert/test_transpose_op.cc b/paddle/fluid/inference/anakin/convert/test_transpose_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..016ed26f02f782fe5032d8368f7767a5c94dfe9f --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/test_transpose_op.cc @@ -0,0 +1,73 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace anakin { + +TEST(transpose_op, test) { + auto* converter = Registry::Global().Lookup("transpose"); + ASSERT_TRUE(converter != nullptr); + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("transpose-X", {2, 3, 4, 5}); + validator.DeclOutputVar("transpose-Out", {4, 2, 5, 3}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("transpose"); + desc.SetInput("X", {"transpose-X"}); + desc.SetOutput("Out", {"transpose-Out"}); + desc.SetAttr("axis", std::vector({2, 0, 3, 1})); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(3); +} + +// test input shape's dims < 4 +TEST(transpose_op, test2) { + std::unordered_set parameters; + framework::Scope scope; + AnakinConvertValidation validator(parameters, &scope); + validator.DeclInputVar("transpose-X", {3, 4, 5}); + validator.DeclOutputVar("transpose-Out", {3, 5, 4}); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("transpose"); + desc.SetInput("X", {"transpose-X"}); + desc.SetOutput("Out", {"transpose-Out"}); + desc.SetAttr("axis", std::vector({0, 2, 1})); + + LOG(INFO) << "set OP"; + validator.SetOp(*desc.Proto()); + LOG(INFO) << "execute"; + + validator.Execute(1); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +USE_OP(transpose); +USE_ANAKIN_CONVERTER(transpose); diff --git a/paddle/fluid/inference/anakin/convert/transpose.cc b/paddle/fluid/inference/anakin/convert/transpose.cc new file mode 100644 index 0000000000000000000000000000000000000000..6a887401034f9d8c0b8b6aa3eeffb6579e395029 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/transpose.cc @@ -0,0 +1,55 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/convert/transpose.h" +#include +#include +#include + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::saber::NV; +using anakin::saber::Shape; +using anakin::PTuple; + +namespace paddle { +namespace inference { +namespace anakin { + +void TransposeOpConverter::operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) { + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto input = op_desc.Input("X").front(); + auto output = op_desc.Output("Out").front(); + auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front(); + engine_->AddOp(op_name, "Permute", {input}, {output}); + + auto axis = boost::get>(op_desc.GetAttr("axis")); + size_t axis_size = axis.size(); + while (axis.size() < 4) { + axis.push_back(axis_size); + axis_size += 1; + } + engine_->AddOpAttr>(op_name, "dims", axis); +} + +} // namespace anakin +} // namespace inference +} // namespace paddle + +REGISTER_ANAKIN_OP_CONVERTER(transpose, TransposeOpConverter); diff --git a/paddle/fluid/inference/anakin/convert/transpose.h b/paddle/fluid/inference/anakin/convert/transpose.h new file mode 100644 index 0000000000000000000000000000000000000000..62d26b6a9cc9885682f5750df32018596f014b33 --- /dev/null +++ b/paddle/fluid/inference/anakin/convert/transpose.h @@ -0,0 +1,35 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/inference/anakin/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace anakin { + +class TransposeOpConverter : public AnakinOpConverter { + public: + TransposeOpConverter() = default; + + virtual void operator()(const framework::proto::OpDesc &op, + const framework::Scope &scope, + bool test_mode) override; + virtual ~TransposeOpConverter() {} +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/convert/ut_helper.h b/paddle/fluid/inference/anakin/convert/ut_helper.h index 38d8e596a738ac98c9f9870473f72dcc72b0e7aa..e0371d95347a521f499dd9454d284907b3048a04 100644 --- a/paddle/fluid/inference/anakin/convert/ut_helper.h +++ b/paddle/fluid/inference/anakin/convert/ut_helper.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include #include @@ -24,6 +25,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" #include "paddle/fluid/inference/anakin/engine.h" #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/utils/singleton.h" @@ -82,7 +84,7 @@ class AnakinConvertValidation { AnakinConvertValidation() = delete; AnakinConvertValidation(const std::unordered_set& parameters, - const framework::Scope& scope) + framework::Scope* scope) : parameters_(parameters), scope_(scope), place_(0) { PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); engine_.reset(new AnakinEngine(true)); @@ -106,7 +108,7 @@ class AnakinConvertValidation { void DeclVar(const std::string& name, const std::vector dim_vec) { platform::CUDADeviceContext ctx(place_); - auto* x = scope_.Var(name); + auto* x = scope_->Var(name); auto* x_tensor = x->GetMutable(); x_tensor->Resize(framework::make_ddim(dim_vec)); RandomizeTensor(x_tensor, place_, ctx); @@ -118,15 +120,22 @@ class AnakinConvertValidation { // should init anakin engine here. Singleton::Global().ConvertOp( - desc, parameters_, scope_, engine_.get(), true /*test_mode*/); + desc, parameters_, *scope_, engine_.get(), true /*test_mode*/); engine_->Freeze(); + + std::map> temp_max_input_shape; for (const auto& input : op_desc_->InputArgumentNames()) { if (parameters_.count(input)) continue; - auto& t = inference::analysis::GetFromScope(scope_, + auto& t = inference::analysis::GetFromScope(*scope_, input); auto t_shape = framework::vectorize2int(t.dims()); + while (t_shape.size() < 4) { + t_shape.push_back(1); + } engine_->SetInputShape(input, t_shape); + temp_max_input_shape[input] = t_shape; } + engine_->SetMaxInputShape(temp_max_input_shape); engine_->Optimize(); engine_->InitGraph(); } @@ -138,14 +147,14 @@ class AnakinConvertValidation { std::unordered_set neglected_output = {}) { // Execute Fluid Op platform::CUDADeviceContext ctx(place_); - op_->Run(scope_, place_); + op_->Run(*scope_, place_); // std::vector input_vector; // std::vector output_vector; std::map inputs; for (const auto& input : op_desc_->InputArgumentNames()) { if (parameters_.count(input)) continue; - auto* var = scope_.FindVar(input); + auto* var = scope_->FindVar(input); auto tensor = var->GetMutable(); inputs.insert({input, tensor}); } @@ -155,45 +164,38 @@ class AnakinConvertValidation { for (const auto& output : op_desc_->OutputArgumentNames()) { if (neglected_output.count(output)) continue; std::vector fluid_out; - auto* var = scope_.FindVar(output); + auto* var = scope_->FindVar(output); auto tensor = var->GetMutable(); framework::TensorToVector(*tensor, ctx, &fluid_out); fluid_outputs.push_back(fluid_out); - // size_t fluid_out_size = fluid_out.size(); - /*for (size_t i = 0; i < fluid_out_size; i++) { - std::cout << fluid_out[i] << std::endl; - }*/ outputs.insert({output, tensor}); } - engine_->Execute(inputs, outputs); + engine_->Execute(inputs, outputs, stream_); int i_output = 0; for (const auto& output : op_desc_->OutputArgumentNames()) { if (neglected_output.count(output)) continue; std::vector anakin_out; - auto* var = scope_.FindVar(output); + auto* var = scope_->FindVar(output); auto tensor = var->GetMutable(); framework::TensorToVector(*tensor, ctx, &anakin_out); size_t anakin_out_size = anakin_out.size(); auto fluid_out = fluid_outputs[i_output++]; for (size_t i = 0; i < anakin_out_size; i++) { - LOG(INFO) << "Output[" << i << "]: anakin[" << anakin_out[i] << "], " - << "fluid[" << fluid_out[i] << "]"; + EXPECT_LT(std::abs(fluid_out[i] - anakin_out[i]), 1e-3); } } } - framework::Scope& scope() { return scope_; } - private: std::unique_ptr engine_{nullptr}; cudaStream_t stream_; std::unique_ptr op_; std::unique_ptr op_desc_; const std::unordered_set& parameters_; - framework::Scope& scope_; + framework::Scope* scope_; platform::CUDAPlace place_; }; diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc index 6549991474f4834f0c3ef74c60d294cca6bebc91..ccf78ad7e56306d24af829c45c888021f4e3fbc4 100644 --- a/paddle/fluid/inference/anakin/engine.cc +++ b/paddle/fluid/inference/anakin/engine.cc @@ -33,9 +33,15 @@ namespace inference { namespace anakin { template -AnakinEngine::AnakinEngine(bool need_summary) +AnakinEngine::AnakinEngine( + bool need_summary, int device, int max_batch_size, + std::map> max_input_shape) : graph_(new AnakinGraphT()), - net_(new AnakinNetT(need_summary)) {} + net_(new AnakinNetT(need_summary)) { + device_ = device; + max_batch_size_ = max_batch_size; + max_input_shape_ = max_input_shape; +} template AnakinEngine::~AnakinEngine() {} @@ -63,34 +69,53 @@ void AnakinEngine::AddOp( template void AnakinEngine::Execute( const std::map &inputs, - const std::map &outputs) { + const std::map &outputs, + cudaStream_t stream) { + cudaDeviceSynchronize(); for (const auto &input : inputs) { auto *tensor = input.second; auto *data = tensor->data(); - auto shape = framework::vectorize2int(tensor->dims()); - ::anakin::saber::Shape anakin_shape(shape); + + auto fluid_input_shape = framework::vectorize2int(tensor->dims()); + while (fluid_input_shape.size() < 4) { + fluid_input_shape.push_back(1); + } auto *anakin_input = net_->get_in(input.first); + std::vector max_input_shape = max_input_shape_[input.first]; + int max_shape_sum = + std::accumulate(max_input_shape.begin(), max_input_shape.end(), 1, + std::multiplies()); + + PADDLE_ENFORCE(max_shape_sum >= tensor->numel(), + "The anakin input max shape should be greater than" + " or equal to the real input shape, Please set the max " + "input shape using EnableAnakinEngine"); + anakin_input->reshape(fluid_input_shape); + ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, - anakin_shape); - anakin_input->share_from(tmp_anakin_tensor); + fluid_input_shape); + anakin_input->copy_from(tmp_anakin_tensor); } - + net_->prediction(); + cudaDeviceSynchronize(); for (const auto &output : outputs) { + platform::CUDAPlace gpu_place(device_); auto *tensor = output.second; - auto *data = tensor->data(); - auto shape = framework::vectorize2int(tensor->dims()); - ::anakin::saber::Shape anakin_shape(shape); auto *anakin_output = net_->get_out(output.first); - ::anakin::saber::Tensor tmp_anakin_tensor(data, TargetT(), 0, - anakin_shape); - anakin_output->share_from(tmp_anakin_tensor); + auto *anakin_data = anakin_output->data(); + auto anakin_output_shape = anakin_output->valid_shape(); + tensor->Resize(framework::make_ddim(anakin_output_shape)); + auto *fluid_data = tensor->mutable_data(gpu_place); + memory::Copy(gpu_place, static_cast(fluid_data), gpu_place, + static_cast(anakin_data), + tensor->numel() * sizeof(float), stream); } - net_->prediction(); + cudaDeviceSynchronize(); } template void AnakinEngine::Freeze() { - PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph."); + PADDLE_ENFORCE(graph_->Freeze_v3(), "Freeze anakin subgraph."); } template diff --git a/paddle/fluid/inference/anakin/engine.h b/paddle/fluid/inference/anakin/engine.h index d8f32f57be5aabb91ba720c6457a03f15083db43..4845ffdf5b9dcfa99d1f421d47328beb4b196298 100644 --- a/paddle/fluid/inference/anakin/engine.h +++ b/paddle/fluid/inference/anakin/engine.h @@ -15,9 +15,11 @@ #pragma once #include +#include #include #include #include +#include #include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/engine.h" @@ -26,8 +28,12 @@ #include "framework/core/net/net.h" #include "framework/core/types.h" #include "framework/graph/graph.h" +#include "framework/graph/graph_global_mem.h" #include "saber/saber_types.h" +using anakin::Precision; +using anakin::saber::NV; + namespace anakin { template @@ -46,8 +52,13 @@ namespace anakin { template class AnakinEngine { + using NetT = ::anakin::Net; + using GraphT = ::anakin::graph::Graph; + public: - explicit AnakinEngine(bool need_summary = false); + explicit AnakinEngine( + bool need_summary = false, int device = 0, int max_batch_size = 1, + std::map> max_input_shape = {}); ~AnakinEngine(); void InitGraph(); void SetInputShape(const std::string &name, std::vector shape); @@ -61,20 +72,72 @@ class AnakinEngine { PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value), "Add operation's attribution."); } - + NetT *Net() { return net_.get(); } + GraphT *Graph() { return graph_.get(); } std::unique_ptr Clone(); + const std::map> &GetMaxInputShape() { + return max_input_shape_; + } + void SetMaxInputShape(std::map> shape) { + max_input_shape_ = shape; + } + int GetMaxBatchSize() { return max_batch_size_; } void Freeze(); void Optimize(); + void AllocTmpMem() { + PADDLE_ENFORCE(net_->alloc_memory_first(*graph_), + "anakin alloc temp memory first failed"); + } + void Save(std::string path) { graph_->save(path); } + + bool IsInit() { return initialized_; } + int GetDevice() { return device_; } void Execute(const std::map &inputs, - const std::map &outputs); + const std::map &outputs, + cudaStream_t stream); private: - using NetT = ::anakin::Net; - using GraphT = ::anakin::graph::Graph; + bool initialized_{false}; + int max_batch_size_; + std::map> max_input_shape_; + int device_; std::unique_ptr graph_; std::unique_ptr net_; }; +class AnakinEngineManager { + using AnakinNvEngineT = AnakinEngine; + + public: + bool HasEngine(const std::string &name) const { + if (engines_.count(name) == 0) return false; + return engines_.at(name).get() != nullptr; + } + AnakinNvEngineT *Get(const std::string &name) const { + return engines_.at(name).get(); + } + + AnakinNvEngineT *Create( + bool need_summary, int device, int max_batch_size, + std::map> max_input_shape, + std::string engine_name) { + std::unique_lock lk(mut_); + auto *p = new AnakinEngine( + need_summary, device, max_batch_size, max_input_shape); + engines_[engine_name].reset(p); + return p; + } + + void DeleteALL() { + for (auto &item : engines_) { + item.second.reset(nullptr); + } + } + + private: + std::unordered_map> engines_; + std::mutex mut_; +}; } // namespace anakin } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/anakin/op_teller.cc b/paddle/fluid/inference/anakin/op_teller.cc new file mode 100644 index 0000000000000000000000000000000000000000..90cf021de2f9d365fd1fa21f7d189d3fcd9d3ab2 --- /dev/null +++ b/paddle/fluid/inference/anakin/op_teller.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/anakin/op_teller.h" + +namespace paddle { +namespace inference { +namespace anakin { + +// Just tell by the op_types. +struct SimpleOpTypeSetTeller : public Teller { + SimpleOpTypeSetTeller() { + teller_set.insert("mul"); + teller_set.insert("fc"); + teller_set.insert("conv2d_fusion"); + teller_set.insert("split"); + teller_set.insert("relu"); + teller_set.insert("pool2d"); + teller_set.insert("elementwise_add"); + teller_set.insert("elementwise_mul"); + teller_set.insert("concat"); + teller_set.insert("tanh"); + teller_set.insert("conv2d"); + teller_set.insert("batch_norm"); + teller_set.insert("softmax"); + teller_set.insert("flatten2"); + teller_set.insert("reshape2"); + teller_set.insert("transpose2"); + teller_set.insert("density_prior_box"); + teller_set.insert("detection_out"); + teller_set.insert("dropout"); + teller_set.insert("sigmoid"); + teller_set.insert("sum"); + } + + bool operator()(const std::string& op_type, + const framework::OpDesc& desc) override { + return teller_set.count(op_type); + } + + private: + std::unordered_set teller_set; +}; + +bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) { + for (auto& teller : tellers_) { + if ((*teller)(op_type, desc)) return true; + } + return false; +} + +OpTeller::OpTeller() { tellers_.emplace_back(new SimpleOpTypeSetTeller); } + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/op_teller.h b/paddle/fluid/inference/anakin/op_teller.h new file mode 100644 index 0000000000000000000000000000000000000000..15a42067b8438e60851a50e454abde95782d90ee --- /dev/null +++ b/paddle/fluid/inference/anakin/op_teller.h @@ -0,0 +1,70 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include "paddle/fluid/framework/op_desc.h" + +namespace paddle { +namespace inference { +namespace anakin { + +/* + * Single Op teller definition. + * One can override this and define a more complex tell logic, considerring more + * issues such as op_desc. + */ +struct Teller { + virtual bool operator()(const std::string& op_type, + const framework::OpDesc& desc) = 0; + + virtual ~Teller() = default; +}; +/* + * A real example: + * + * struct SomeTeller : public Teller { + * bool operator()(const std::string& op_type, + * const framework::OpDesc& desc) override { + * return op_type == "fc" && desc.Inputs().size() == 2; + * } + *}; + */ + +/* + * class OpTeller helps to tell whether a fluid + * operator can be transformed to a TensorRT layer. + */ +class OpTeller { + public: + static OpTeller& Global() { + static std::unique_ptr x(new OpTeller); + return *x; + } + + bool Tell(const std::string& op_type, const framework::OpDesc& desc); + + private: + OpTeller(); + + private: + std::vector> tellers_; +}; + +} // namespace anakin +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/anakin/test_anakin_engine.cc b/paddle/fluid/inference/anakin/test_anakin_engine.cc index 571294d3e22fb9489686bfcb2f3a64198099f970..8fd6b8bec9ada6dec67fd24a2457713203431ebf 100644 --- a/paddle/fluid/inference/anakin/test_anakin_engine.cc +++ b/paddle/fluid/inference/anakin/test_anakin_engine.cc @@ -17,9 +17,6 @@ limitations under the License. */ #include -#include "framework/core/net/net.h" -#include "framework/graph/graph.h" -#include "framework/graph/graph_global_mem.h" #include "paddle/fluid/inference/anakin/engine.h" using anakin::graph::GraphGlobalMem; @@ -84,7 +81,9 @@ TEST_F(TestAnakinEngine, Execute) { auto *y_data = y.mutable_data(platform::CUDAPlace()); std::map outputs = {{"y", &y}}; - engine_->Execute(inputs, outputs); + cudaStream_t stream; + + engine_->Execute(inputs, outputs, stream); auto *y_data_gpu = y_data; float y_data_cpu[2]; cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 78cc1e9688359cb82d710ea9e9dba972bf9a7516..29f16943e0c13fbe080e8e073b081583f1d14d11 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -23,6 +23,7 @@ #pragma once +#include #include #include #include @@ -58,6 +59,8 @@ struct Argument { using unique_ptr_t = std::unique_ptr>; using fusion_statis_t = std::unordered_map; + using engine_opt_info_t = std::map; + using anakin_max_shape_t = std::map>; bool Has(const std::string& key) const { return valid_fields_.count(key); } @@ -110,12 +113,14 @@ struct Argument { private: \ unique_ptr_t field__##_; + DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); // Model path DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string); // Model specified with program and parameters files. DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string); DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string); DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool); + DECL_ARGUMENT_FIELD(engine_opt_info, EngineOptInfo, engine_opt_info_t); // The overall graph to work on. DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph); @@ -160,6 +165,11 @@ struct Argument { DECL_ARGUMENT_FIELD(tensorrt_use_static_engine, TensorRtUseStaticEngine, bool); + DECL_ARGUMENT_FIELD(anakin_max_input_shape, AnakinMaxInputShape, + anakin_max_shape_t); + DECL_ARGUMENT_FIELD(anakin_max_batch_size, AnakinMaxBatchSize, int); + DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool); + // Memory optimized related. DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); DECL_ARGUMENT_FIELD(static_memory_optim, StaticMemoryOptim, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index c7960004665381aa916a08f57fcaa7c56781a1af..7a96ac11d8ef754f38070862a70744947412882b 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -13,9 +13,12 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/ir_pass_manager.h" +#include +#include #include #include #include +#include #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" @@ -85,16 +88,40 @@ void IRPassManager::CreatePasses(Argument *argument, AnalysisConfig::Precision::kInt8; pass->Set("enable_int8", new bool(enable_int8)); - std::string model_opt_cache_dir = - argument->Has("model_dir") - ? argument->model_dir() - : GetDirRoot(argument->model_program_path()); - pass->Set( - "model_opt_cache_dir", - new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir))); + + bool use_static_engine = argument->tensorrt_use_static_engine(); + bool model_from_memory = argument->model_from_memory(); + bool int8_valid = !(model_from_memory && enable_int8); + PADDLE_ENFORCE(int8_valid, + "TRT INT8 Now don't support model load from memory."); + + if ((!model_from_memory && use_static_engine) || enable_int8) { + std::string model_opt_cache_dir = + argument->Has("model_dir") + ? argument->model_dir() + : GetDirRoot(argument->model_program_path()); + pass->Set( + "model_opt_cache_dir", + new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir))); + } + pass->Set("gpu_device_id", new int(argument->gpu_device_id())); + pass->Set("use_static_engine", new bool(use_static_engine)); + pass->Set("model_from_memory", new bool(argument->model_from_memory())); + pass->Set("engine_opt_info", new std::map( + argument->engine_opt_info())); + } + + if (pass_name == "anakin_subgraph_pass") { + pass->Set("program", + new framework::ProgramDesc *(&argument->main_program())); pass->Set("gpu_device_id", new int(argument->gpu_device_id())); - pass->Set("use_static_engine", - new bool(argument->tensorrt_use_static_engine())); + pass->Set("model_from_memory", new bool(argument->model_from_memory())); + pass->Set("engine_opt_info", new std::map( + argument->engine_opt_info())); + pass->Set("predictor_id", new int(argument->predictor_id())); + pass->Set("max_input_shape", new std::map>( + argument->anakin_max_input_shape())); + pass->Set("max_batch_size", new int(argument->anakin_max_batch_size())); } pre_pass = pass_name; diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt index 410a90132aa7657a23b858570763547fe53730a0..05a3d7ddfdb08c98866cc0a08ec4113866c7567d 100644 --- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -1,4 +1,4 @@ -cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS proto_desc) +cc_library(subgraph_detector SRCS subgraph_detector.cc subgraph_util.cc DEPS proto_desc) if(WITH_TESTING) add_dependencies(subgraph_detector gtest) endif() @@ -14,3 +14,15 @@ if (WITH_GPU AND TENSORRT_FOUND) file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n") set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") endif() + +if (ANAKIN_FOUND) + cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_detector anakin_op_teller) + + set(analysis_deps ${analysis_deps} + subgraph_detector anakin_subgraph_pass + CACHE INTERNAL "") + + set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h) + file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n") + set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "") +endif() diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..12deed2533bba713701849d58f8c5cf3269b85da --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.cc @@ -0,0 +1,219 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/op_teller.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace inference { +namespace analysis { + +using framework::ir::Node; + +std::unique_ptr analysis::AnakinSubgraphPass::ApplyImpl( + std::unique_ptr graph) const { + framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph.get()); + + auto teller = [](const framework::ir::Node *node) { + if (!node->IsOp() || !node->Op()) return false; + return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op()); + }; + + SubGraphFuser fuser(graph.get(), teller, 6 /* min_subgraph_size */); + fuser(); + + std::vector graph_param_names = + ExtractParameters(graph->Nodes()); + + // those parameter already exist in anakin, and should not have another copy + // in fluid. + std::vector repetitive_params; + + for (auto *node : graph->Nodes()) { + if (node->IsOp() && !Agent(node).subgraph()->empty()) { + CreateAnakinOp(node, graph.get(), graph_param_names, &repetitive_params); + std::unordered_set nodes2remove( + Agent(node).subgraph()->begin(), Agent(node).subgraph()->end()); + framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + } + } + + std::unordered_set nodes2remove; + for (auto *node : graph->Nodes()) { + if (node->IsOp() && Agent(node).deleted()) { + nodes2remove.insert(node); + } + } + framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + graph->Set(framework::ir::kRepetitiveParamAttr, + new std::vector(repetitive_params)); + + return graph; +} + +std::string GenerateAnakinEngineKey(const std::set &engine_inputs, + const std::set &engine_outputs, + std::string id) { + std::string engine_hash_key = ""; + for (auto name : engine_inputs) { + engine_hash_key += name; + } + for (auto name : engine_outputs) { + engine_hash_key += name; + } + engine_hash_key += id; + auto engine_key = std::to_string(std::hash()(engine_hash_key)); + return engine_key; +} + +void AnakinSubgraphPass::CreateAnakinOp( + framework::ir::Node *node, Graph *graph, + const std::vector &graph_params, + std::vector *repetitive_params) const { + auto *op_desc = node->Op(); + auto &subgraph = *Agent(node).subgraph(); + PADDLE_ENFORCE(!subgraph.empty()); + + framework::ProgramDesc *program_desc = + Get("program"); + // Add new block for TensorRTEngineOP + const framework::BlockDesc &main_block = + program_desc->Block(framework::kRootBlockIndex); + // const framework::BlockDesc& main_block = program_desc->Block(0); + framework::BlockDesc *new_block = program_desc->AppendBlock(main_block); + + // An fake block desc. + framework::proto::BlockDesc block_proto; + framework::BlockDesc block_desc(nullptr, &block_proto); + block_desc.Proto()->set_parent_idx(-1); + block_desc.Proto()->set_idx(0); + string::PrettyLogDetail("--- detect a sub-graph with %d nodes", + subgraph.size()); + + for (auto *node : subgraph) { + auto *new_block_op = new_block->AppendOp(); + auto *op = block_desc.AppendOp(); + *new_block_op->Proto() = *node->Op()->Proto(); + *op->Proto() = *node->Op()->Proto(); + } + + // Then, we will use the input_names_with_id and output_names_with_id to + // generate the eigine key. + // So, We use set instead of unordered_set here to ensure that the engine key + // is unique. + std::set input_names; + std::set input_names_with_id; + std::vector params; + for (auto *x : node->inputs) { + input_names.insert(x->Name()); + input_names_with_id.insert(x->Name() + std::to_string(x->id())); + if (std::count(graph_params.begin(), graph_params.end(), x->Name()) > 0) { + params.push_back(x->Name()); + } + } + std::copy(params.begin(), params.end(), + std::back_inserter(*repetitive_params)); + op_desc->SetInput( + "Xs", std::vector(input_names.begin(), input_names.end())); + + std::set output_names; + std::set output_names_with_id; + for (auto *x : node->outputs) { + output_names.insert(x->Name()); + output_names_with_id.insert(x->Name() + std::to_string(x->id())); + } + + op_desc->SetOutput( + "Ys", std::vector(output_names.begin(), output_names.end())); + op_desc->SetType("anakin_engine"); + + std::unordered_map output_name_map; + auto &subgraph_nodes = *Agent(node).subgraph(); + + // The following procedure is used to rename all the intermediate + // variables and the output variables of the subgraph. + RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id, + &output_names_with_id, &output_names, &output_name_map, + false); + + // When anakin engine runs at the end of the operation, + // output_mapping help us copy the data from the renamed ITensor + // to Tensor. + std::vector output_mapping; + for (auto name : output_names) { + PADDLE_ENFORCE(output_name_map.count(name) != 0); + output_mapping.push_back(output_name_map[name]); + } + + auto *vars = block_desc.Proto()->mutable_vars(); + for (framework::ir::Node *node : graph->Nodes()) { + if (node->IsVar() && node->Var()) { + *vars->Add() = *node->Var()->Proto(); + } + } + + PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), + "the block has no var-desc"); + PADDLE_ENFORCE(!output_mapping.empty()); + op_desc->SetBlockAttr("sub_block", new_block); + SetAttr(op_desc->Proto(), "subgraph", + block_desc.Proto()->SerializeAsString()); + // Set attrs + SetAttr(op_desc->Proto(), "parameters", params); + SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping); + int predictor_id = Get("predictor_id"); + auto engine_key = GenerateAnakinEngineKey( + input_names_with_id, output_names_with_id, std::to_string(predictor_id)); + + SetAttr(op_desc->Proto(), "engine_key", engine_key); + auto max_input_shape = + Get>>("max_input_shape"); + auto max_batch_size = Get("max_batch_size"); + + auto *anakin_engine = + inference::Singleton::Global().Create( + true, Get("gpu_device_id"), max_batch_size, max_input_shape, + engine_key); + + auto *scope = param_scope(); + std::unordered_set param_set(params.begin(), params.end()); + framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); + + inference::Singleton::Global() + .ConvertBlockToAnakinEngine( + &block_desc_temp, scope, + std::vector(input_names.begin(), input_names.end()), + param_set, output_mapping, anakin_engine); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle + +REGISTER_PASS(anakin_subgraph_pass, + paddle::inference::analysis::AnakinSubgraphPass); diff --git a/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..c13b9ecda42336a79187185070104ba9ac4b67bc --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h @@ -0,0 +1,44 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/inference/anakin/engine.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" + +using anakin::Precision; +using anakin::saber::NV; +namespace paddle { +namespace inference { +namespace analysis { + +class AnakinSubgraphPass : public framework::ir::FusePassBase { + public: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; + + private: + void CreateAnakinOp(framework::ir::Node *x, framework::ir::Graph *graph, + const std::vector &graph_params, + std::vector *repetitive_params) const; + void CleanIntermediateOutputs(framework::ir::Node *node); +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc index 96befe7f8a5d16402338ac337daa96d714b4d310..76b1671601eec95d64b36effc5727481dcd070e2 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" #include +#include +#include #include #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -418,7 +420,7 @@ void SubGraphFuser::ReplaceNodesWithSubGraphs() { // Node that contains this subgraph 2. Mark the nodes inside the sub-graph // as deleted. 3. Replace the deleted node with the new Block Node. framework::OpDesc empty_desc; - empty_desc.SetType("tensorrt_engine"); + empty_desc.SetType("anakin_engine"); auto *block_node = graph_->CreateOpNode(&empty_desc); Agent(block_node).set_subgraph({}); auto io = ExtractInputAndOutputOfSubGraph(subgraph); diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..a17ee1b707a7f950cddc62373a9a57c793d5528f --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc @@ -0,0 +1,152 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file defines the the class to partition a graph. + */ + +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" +#include +#include + +namespace paddle { +namespace inference { +namespace analysis { +using framework::ir::Node; + +std::vector ExtractParameters( + const std::unordered_set &nodes) { + // We can judge whether a variable is a parameter by + // its presistable property, but sometimes the presistable + // of the feed op output is true, so we have to identify it. + std::vector feed_outputs; + for (const auto &node : nodes) { + if (!node->IsOp()) continue; + std::string op_type = node->Op()->Type(); + if (op_type == "feed" || op_type == "fetch") { + std::vector output_names = node->Op()->OutputArgumentNames(); + std::copy(output_names.begin(), output_names.end(), + std::back_inserter(feed_outputs)); + } + } + + std::vector parameters; + for (const auto &node : nodes) { + if (!node->IsVar()) continue; + if (node->Var()->Persistable() && + std::find(feed_outputs.begin(), feed_outputs.end(), node->Name()) == + feed_outputs.end()) { + parameters.push_back(node->Name()); + } + } + return parameters; +} + +void RenameAndGetOutputs( + const std::vector &subgraph_nodes, + framework::BlockDesc *block_desc, + const std::set &input_names_with_id, + std::set *output_names_with_id, + std::set *output_names, + std::unordered_map *output_name_map, + bool is_trt) { + //// In the normal case, the paddle-trt exists bug when runing the googlenet. + // When there are more than two convolutions of 1 * 1 with the same input, the + // paddle-tensorrt will do the merging optimization, which fuse those conv + // into one conv, and then trigger bug. So, We should use strategy to avoid + // this optimization for the time being. This bug will be fixed in the future. + std::unordered_map + same_hierarchy_conv2d_num_map; + + for (size_t index = 0; index < block_desc->OpSize(); ++index) { + framework::proto::OpDesc *op = block_desc->Op(index)->Proto(); + framework::OpDesc op_desc(*op, nullptr); + auto correspond_node = subgraph_nodes[index]; + PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); + + std::unordered_map var2id; + std::unordered_map in_vars; + for (auto *in_var : correspond_node->inputs) { + var2id[in_var->Name()] = in_var->id(); + in_vars[in_var->Name()] = in_var; + } + // rename for the input variables of op inside subgraph + for (int i = 0; i < op->inputs_size(); i++) { + // one input + auto *in_var = op->mutable_inputs(i); + std::vector replaced_names; + for (int k = 0; k < in_var->arguments_size(); k++) { // all the arguments + std::string arg_value = in_var->arguments(k); + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (input_names_with_id.count(arg_value_with_id)) { + replaced_names.push_back(arg_value); + } else { + replaced_names.push_back(arg_value_with_id); + } + } + in_var->clear_arguments(); + for (size_t k = 0; k < replaced_names.size(); k++) { + in_var->add_arguments(replaced_names[k]); + } + } + var2id.clear(); + for (auto out_var : correspond_node->outputs) { + var2id[out_var->Name()] = out_var->id(); + } + + if (op_desc.Type() == "conv2d" && is_trt) { + auto input_var_name = op_desc.Input("Input").front(); + auto filter_var_name = op_desc.Input("Filter").front(); + auto out_var_name = op_desc.Output("Output").front(); + auto filter_shape = in_vars[filter_var_name]->Var()->GetShape(); + const std::vector strides = + boost::get>(op_desc.GetAttr("strides")); + const std::vector paddings = + boost::get>(op_desc.GetAttr("paddings")); + if (same_hierarchy_conv2d_num_map[input_var_name] > 0) { + (*output_names_with_id) + .insert(out_var_name + std::to_string(var2id[out_var_name])); + (*output_names).insert(out_var_name); + } else if (filter_shape[2] == 1 && filter_shape[3] == 1 && + strides[0] == 1 && strides[1] == 1 && paddings[0] == 0 && + paddings[1] == 0) { + same_hierarchy_conv2d_num_map[input_var_name] += 1; + } + } + + // rename for the output variables of op inside subgraph + for (int i = 0; i < op->outputs_size(); i++) { + framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i); + std::vector replaced_names; + for (int k = 0; k < out_var->arguments_size(); k++) { + std::string arg_value = out_var->arguments(k); + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (output_names_with_id->count(arg_value_with_id)) { + (*output_name_map)[arg_value] = arg_value_with_id; + } + replaced_names.push_back(arg_value_with_id); + } + out_var->clear_arguments(); + for (size_t k = 0; k < replaced_names.size(); k++) { + out_var->add_arguments(replaced_names[k]); + } + } + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h new file mode 100644 index 0000000000000000000000000000000000000000..3cf21bf5f426a7142626e6ae1db6ee478418d08a --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file defines the the class to partition a graph. + */ + +#pragma once +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_traits.h" +#include "paddle/fluid/framework/ir/node.h" + +namespace paddle { +namespace inference { +namespace analysis { +using framework::ir::Node; + +std::vector ExtractParameters( + const std::unordered_set &nodes); + +void RenameAndGetOutputs( + const std::vector &subgraph_nodes, + framework::BlockDesc *block_desc, + const std::set &input_names_with_id, + std::set *output_names_with_id, + std::set *output_names, + std::unordered_map *output_name_map, + bool is_trt = true); + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index d4e2da8957f2057b21460d00b71e9717c63ed054..59399403276b59c143fc3e06a53643e0a85cf559 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -30,17 +31,6 @@ namespace analysis { using framework::ir::Node; -std::vector ExtractParameters( - const std::unordered_set &nodes); - -void RenameAndGetOutputs( - const std::vector &subgraph_nodes, - framework::BlockDesc *block_desc, - const std::set &input_names_with_id, - std::set *output_names_with_id, - std::set *output_names, - std::unordered_map *output_name_map); - std::unique_ptr analysis::TensorRtSubgraphPass::ApplyImpl( std::unique_ptr graph) const { framework::ir::FusePassBase::Init("tensorrt_subgraph_pass", graph.get()); @@ -209,186 +199,86 @@ void TensorRtSubgraphPass::CreateTensorRTOp( SetAttr(op_desc->Proto(), "parameters", params); auto enable_int8 = Get("enable_int8"); + auto use_static_engine = Get("use_static_engine"); auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id, std::to_string(0)); // Get "" when there is no cached calibration table data. - std::string calibration_data = GetTrtCalibTableData( - Get("model_opt_cache_dir"), engine_key, enable_int8); + bool load_from_memory = Get("model_from_memory"); + std::string calibration_data = ""; + if (enable_int8) { + calibration_data = GetTrtCalibTableData( + Get("model_opt_cache_dir"), engine_key, enable_int8); + } SetAttr(op_desc->Proto(), "calibration_data", calibration_data); SetAttr(op_desc->Proto(), "enable_int8", enable_int8); SetAttr(op_desc->Proto(), "engine_key", engine_key); - SetAttr(op_desc->Proto(), "engine_serialized_data", std::string("")); + std::string trt_engine_serialized_data = ""; + + SetAttr(op_desc->Proto(), "engine_serialized_data", + trt_engine_serialized_data); std::unique_ptr calibrator; if (enable_int8 && calibration_data.size() != 0) { calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data)); } - - bool use_static_engine = Get("use_static_engine"); // When in int8 mode and calibration_mode, the program just produce the // calibration table data. bool calibration_mode = (enable_int8 && calibration_data.size() == 0); - if (!calibration_mode && use_static_engine) { - std::copy(params.begin(), params.end(), - std::back_inserter(*repetitive_params)); - std::string trt_engine_serialized_data = GetTrtEngineSerializedData( - Get("model_opt_cache_dir"), engine_key); + if (calibration_mode) { + // calibraion mode means generate int8 calibration table data process. + return; + } - if (trt_engine_serialized_data.empty()) { - LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP " - "kernel etc). This process may cost a lot of time."; - std::unique_ptr trt_engine( - new tensorrt::TensorRTEngine( - Get("max_batch_size"), Get("workspace_size"), - enable_int8, calibrator.get(), Get("gpu_device_id"))); - auto *scope = param_scope(); - framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); - std::unordered_set param_set(params.begin(), params.end()); - inference::Singleton::Global() - .ConvertBlockToTRTEngine( - &block_desc_temp, *scope, - std::vector(input_names.begin(), input_names.end()), - param_set, output_mapping, trt_engine.get()); - nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); - trt_engine_serialized_data = - std::string((const char *)serialized_engine_data->data(), - serialized_engine_data->size()); - SaveTrtEngineSerializedDataToFile( - GetTrtEngineSerializedPath(Get("model_opt_cache_dir"), - engine_key), - trt_engine_serialized_data); - } else { + std::copy(params.begin(), params.end(), + std::back_inserter(*repetitive_params)); + bool need_serialize = (use_static_engine && !load_from_memory); + + if (need_serialize) { + trt_engine_serialized_data = GetTrtEngineSerializedData( + Get("model_opt_cache_dir"), engine_key); + // we can load the engine info serialized before from the disk. + if (!trt_engine_serialized_data.empty()) { + SetAttr(op_desc->Proto(), "engine_serialized_data", + trt_engine_serialized_data); LOG(INFO) << "Load TRT Optimized Info from " << GetTrtEngineSerializedPath( Get("model_opt_cache_dir"), engine_key); - } - - SetAttr(op_desc->Proto(), "engine_serialized_data", - trt_engine_serialized_data); - } -} - -std::vector ExtractParameters( - const std::unordered_set &nodes) { - // We can judge whether a variable is a parameter by - // its presistable property, but sometimes the presistable - // of the feed op output is true, so we have to identify it. - std::vector feed_outputs; - for (const auto &node : nodes) { - if (!node->IsOp()) continue; - std::string op_type = node->Op()->Type(); - if (op_type == "feed" || op_type == "fetch") { - std::vector output_names = node->Op()->OutputArgumentNames(); - std::copy(output_names.begin(), output_names.end(), - std::back_inserter(feed_outputs)); + return; } } - std::vector parameters; - for (const auto &node : nodes) { - if (!node->IsVar()) continue; - if (node->Var()->Persistable() && - std::find(feed_outputs.begin(), feed_outputs.end(), node->Name()) == - feed_outputs.end()) { - parameters.push_back(node->Name()); - } - } - return parameters; -} - -void RenameAndGetOutputs( - const std::vector &subgraph_nodes, - framework::BlockDesc *block_desc, - const std::set &input_names_with_id, - std::set *output_names_with_id, - std::set *output_names, - std::unordered_map *output_name_map) { - //// In the normal case, the paddle-trt exists bug when runing the googlenet. - // When there are more than two convolutions of 1 * 1 with the same input, the - // paddle-tensorrt will do the merging optimization, which fuse those conv - // into one conv, and then trigger bug. So, We should use strategy to avoid - // this optimization for the time being. This bug will be fixed in the future. - std::unordered_map - same_hierarchy_conv2d_num_map; - - for (size_t index = 0; index < block_desc->OpSize(); ++index) { - framework::proto::OpDesc *op = block_desc->Op(index)->Proto(); - framework::OpDesc op_desc(*op, nullptr); - auto correspond_node = subgraph_nodes[index]; - PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); - - std::unordered_map var2id; - std::unordered_map in_vars; - for (auto *in_var : correspond_node->inputs) { - var2id[in_var->Name()] = in_var->id(); - in_vars[in_var->Name()] = in_var; - } - // rename for the input variables of op inside subgraph - for (int i = 0; i < op->inputs_size(); i++) { - // one input - auto *in_var = op->mutable_inputs(i); - std::vector replaced_names; - for (int k = 0; k < in_var->arguments_size(); k++) { // all the arguments - std::string arg_value = in_var->arguments(k); - std::string arg_value_with_id = - arg_value + std::to_string(var2id[arg_value]); - if (input_names_with_id.count(arg_value_with_id)) { - replaced_names.push_back(arg_value); - } else { - replaced_names.push_back(arg_value_with_id); - } - } - in_var->clear_arguments(); - for (size_t k = 0; k < replaced_names.size(); k++) { - in_var->add_arguments(replaced_names[k]); - } - } - var2id.clear(); - for (auto out_var : correspond_node->outputs) { - var2id[out_var->Name()] = out_var->id(); - } - - if (op_desc.Type() == "conv2d") { - auto input_var_name = op_desc.Input("Input").front(); - auto filter_var_name = op_desc.Input("Filter").front(); - auto out_var_name = op_desc.Output("Output").front(); - auto filter_shape = in_vars[filter_var_name]->Var()->GetShape(); - const std::vector strides = - boost::get>(op_desc.GetAttr("strides")); - const std::vector paddings = - boost::get>(op_desc.GetAttr("paddings")); - if (same_hierarchy_conv2d_num_map[input_var_name] > 0) { - (*output_names_with_id) - .insert(out_var_name + std::to_string(var2id[out_var_name])); - (*output_names).insert(out_var_name); - } else if (filter_shape[2] == 1 && filter_shape[3] == 1 && - strides[0] == 1 && strides[1] == 1 && paddings[0] == 0 && - paddings[1] == 0) { - same_hierarchy_conv2d_num_map[input_var_name] += 1; - } - } - - // rename for the output variables of op inside subgraph - for (int i = 0; i < op->outputs_size(); i++) { - framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i); - std::vector replaced_names; - for (int k = 0; k < out_var->arguments_size(); k++) { - std::string arg_value = out_var->arguments(k); - std::string arg_value_with_id = - arg_value + std::to_string(var2id[arg_value]); - if (output_names_with_id->count(arg_value_with_id)) { - (*output_name_map)[arg_value] = arg_value_with_id; - } - replaced_names.push_back(arg_value_with_id); - } - out_var->clear_arguments(); - for (size_t k = 0; k < replaced_names.size(); k++) { - out_var->add_arguments(replaced_names[k]); - } - } + // the following code will NOT run in following situation: + // 1. calibraion mode (generate trt int8 calibraiton table data) + // 2. already load serialized trt engine info. + LOG(INFO) << "Prepare TRT engine (Optimize model structure, Select OP " + "kernel etc). This process may cost a lot of time."; + std::unique_ptr trt_engine( + new tensorrt::TensorRTEngine( + Get("max_batch_size"), Get("workspace_size"), enable_int8, + calibrator.get(), Get("gpu_device_id"))); + auto *scope = param_scope(); + framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto()); + std::unordered_set param_set(params.begin(), params.end()); + inference::Singleton::Global() + .ConvertBlockToTRTEngine( + &block_desc_temp, *scope, + std::vector(input_names.begin(), input_names.end()), + param_set, output_mapping, trt_engine.get()); + nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize(); + trt_engine_serialized_data = + std::string((const char *)serialized_engine_data->data(), + serialized_engine_data->size()); + + if (need_serialize) { + SaveTrtEngineSerializedDataToFile( + GetTrtEngineSerializedPath(Get("model_opt_cache_dir"), + engine_key), + trt_engine_serialized_data); } + SetAttr(op_desc->Proto(), "engine_serialized_data", + trt_engine_serialized_data); } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h index 6689a668fc9313df4105875477424f1426637226..f043670c5af39c1bdf8d4f00c7294fb53a4c9039 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h @@ -20,6 +20,7 @@ #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 85755fc471ae3d37ec5d005882668ccf0c35b354..90f09505c023c656a0b4ffaf9e3ef52152c0f0e7 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -27,6 +27,10 @@ if(WITH_GPU AND TENSORRT_FOUND) set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) endif() +if (ANAKIN_FOUND) + set(inference_deps ${inference_deps} anakin_op_converter anakin_engine) +endif() + add_subdirectory(details) cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 1be25de497346913f24eec147a2db58b0f7065f4..7bfdada49664544c829b1f4fc886292b29717c32 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -21,6 +21,7 @@ #include "paddle/fluid/platform/gpu_info.h" namespace paddle { +extern const std::vector kAnakinSubgraphPasses; PassStrategy *AnalysisConfig::pass_builder() const { if (!pass_builder_.get()) { @@ -108,6 +109,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); + CP_MEMBER(use_anakin_); + CP_MEMBER(anakin_max_batchsize_); + CP_MEMBER(anakin_max_input_shape_); + // Ir related. CP_MEMBER(enable_ir_optim_); CP_MEMBER(use_feed_fetch_ops_); @@ -230,6 +235,20 @@ void AnalysisConfig::Update() { } } + if (use_anakin_) { + PADDLE_ENFORCE(!use_tensorrt_, + "Anakin sub-graph and TensorRT sub-graph are not allowed to " + "run at the same time!"); + PADDLE_ENFORCE( + use_gpu_, + "Anakin sub-graph engine need gpu, please use the EnableGpu API."); + + pass_builder()->ClearPasses(); + for (const auto &pass : kAnakinSubgraphPasses) { + pass_builder()->AppendPass(pass); + } + } + if (ir_debug_) { pass_builder()->TurnOnDebug(); } @@ -266,7 +285,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << specify_input_name_; ss << cpu_math_library_num_threads_; - + ss << use_anakin_; return ss.str(); } @@ -316,6 +335,11 @@ void AnalysisConfig::SetModelBuffer(const char *prog_buffer, Update(); } +void AnalysisConfig::SetEngineOptInfo( + std::map engine_opt_info) { + engine_opt_info_ = engine_opt_info; +} + NativeConfig AnalysisConfig::ToNativeConfig() const { NativeConfig config; config.model_dir = model_dir_; @@ -332,5 +356,12 @@ void AnalysisConfig::SwitchIrDebug(int x) { ir_debug_ = x; Update(); } - +void AnalysisConfig::EnableAnakinEngine( + int max_batch_size, + std::map> max_input_shape) { + anakin_max_batchsize_ = max_batch_size; + anakin_max_input_shape_ = max_input_shape; + use_anakin_ = true; + Update(); +} } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index b58c60e96a0bd6695b827e7063fa7a07f42fe586..001e8e66d5560f631dab8dd7c13bbaaef1e6195a 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -40,7 +40,10 @@ #if PADDLE_WITH_TENSORRT #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h" +#endif +#if PADDLE_WITH_ANAKIN +#include "paddle/fluid/inference/anakin/convert/op_converter.h" #endif DECLARE_bool(profile); @@ -349,7 +352,10 @@ void AnalysisPredictor::OptimizeInferenceProgram() { argument_.SetStaticMemoryOptimForceUpdate( config_.static_memory_optim_force_update_); argument_.SetModelFromMemory(config_.model_from_memory_); + argument_.SetEngineOptInfo(config_.engine_opt_info_); // Analyze inference_program + argument_.SetUseAnakin(config_.anakin_engine_enabled()); + argument_.SetPredictorID(predictor_id_); if (!config_.model_dir().empty()) { argument_.SetModelDir(config_.model_dir()); } else { @@ -373,6 +379,12 @@ void AnalysisPredictor::OptimizeInferenceProgram() { argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_); } + if (config_.use_gpu() && config_.anakin_engine_enabled()) { + argument_.SetAnakinMaxBatchSize(config_.anakin_max_batchsize_); + argument_.SetAnakinMaxInputShape(config_.anakin_max_input_shape_); + LOG(INFO) << "Anakin subgraph engine is enabled"; + } + if (config_.use_mkldnn_) { LOG(INFO) << "MKLDNN is enabled"; argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); @@ -402,7 +414,7 @@ std::unique_ptr CreatePaddlePredictor< VLOG(3) << "create AnalysisConfig"; if (config.use_gpu()) { // 1. GPU memory - PADDLE_ENFORCE_GT(config.memory_pool_init_size_mb(), 0.f); + PADDLE_ENFORCE_GE(config.memory_pool_init_size_mb(), 0.f); PADDLE_ENFORCE_GE(config.gpu_device_id(), 0, "Invalid device id %d", config.gpu_device_id()); std::vector flags; @@ -805,3 +817,27 @@ USE_TRT_CONVERTER(prelu); USE_TRT_CONVERTER(conv2d_transpose); USE_TRT_CONVERTER(leaky_relu); #endif + +#if PADDLE_WITH_ANAKIN +USE_ANAKIN_CONVERTER(mul); +USE_ANAKIN_CONVERTER(fc); +USE_ANAKIN_CONVERTER(conv2d); +USE_ANAKIN_CONVERTER(conv2d_fusion); +USE_ANAKIN_CONVERTER(concat); +USE_ANAKIN_CONVERTER(split); +USE_ANAKIN_CONVERTER(relu); +USE_ANAKIN_CONVERTER(sigmoid); +USE_ANAKIN_CONVERTER(tanh); +USE_ANAKIN_CONVERTER(pool2d); +USE_ANAKIN_CONVERTER(elementwise_add); +USE_ANAKIN_CONVERTER(elementwise_mul); +USE_ANAKIN_CONVERTER(batch_norm); +USE_ANAKIN_CONVERTER(flatten); +USE_ANAKIN_CONVERTER(reshape); +USE_ANAKIN_CONVERTER(transpose); +USE_ANAKIN_CONVERTER(softmax); +USE_ANAKIN_CONVERTER(detection_out); +USE_ANAKIN_CONVERTER(density_prior_box); +USE_ANAKIN_CONVERTER(dropout); +USE_ANAKIN_CONVERTER(sum); +#endif diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 5c0535d63e00c32ef82aa6d804459542d7da3e50..087bfbd002658da28e3097a10933d5be2312ab4f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -45,7 +45,9 @@ using framework::NaiveExecutor; */ class AnalysisPredictor : public PaddlePredictor { public: - explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {} + explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) { + predictor_id_ = inference::GetUniqueId(); + } ~AnalysisPredictor(); bool Init(const std::shared_ptr &parent_scope, @@ -152,6 +154,7 @@ class AnalysisPredictor : public PaddlePredictor { const size_t max_shape_collect_count_{1000}; int need_collect_var_shapes_{-1}; // -1 for default, 0 for false, 1 for true. std::vector>> batch_var_shapes_; + int predictor_id_; private: // Some status here that help to determine the status inside the predictor. diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 9a40cf4b60a64c3d0452a4367ccb7ac36de6b3b8..937b6398f8131a6cf4e8b0002e38f4513f0f884f 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -74,6 +74,21 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const { return res; } +PaddleDType ZeroCopyTensor::type() const { + EAGER_GET_TENSOR; + auto type = tensor->type(); + if (type == framework::proto::VarType::FP32) { + return PaddleDType::FLOAT32; + } else if (type == framework::proto::VarType::INT64) { + return PaddleDType::INT64; + } else if (type == framework::proto::VarType::INT32) { + return PaddleDType::INT32; + } else { + LOG(ERROR) << "unknown type, only support float32 and int64 now."; + } + return PaddleDType::FLOAT32; +} + template void ZeroCopyTensor::copy_from_cpu(const T *data) { EAGER_GET_TENSOR; @@ -119,6 +134,7 @@ void ZeroCopyTensor::copy_to_cpu(T *data) { static_cast(pool.Get(gpu_place)); memory::Copy(platform::CPUPlace(), static_cast(data), gpu_place, t_data, ele_num * sizeof(T), dev_ctx->stream()); + cudaDeviceSynchronize(); #else PADDLE_THROW("Not compile with CUDA, should not reach here."); #endif diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 9b05c335047d7f9a0c50004e4ff6817ddd53d80f..23df507aa609c018e54524159494b08b7e42c75c 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -14,9 +14,11 @@ #pragma once #include +#include #include #include #include +#include #include /*! \file */ @@ -136,10 +138,20 @@ struct AnalysisConfig { void EnableTensorRtEngine(int workspace_size = 1 << 20, int max_batch_size = 1, int min_subgraph_size = 3, Precision precision = Precision::kFloat32, - bool use_static = true); + bool use_static = false); /** A boolean state telling whether the TensorRT engine is used. */ bool tensorrt_engine_enabled() const { return use_tensorrt_; } + /** + * \brief Turn on the usage of Anakin sub-graph engine. + */ + void EnableAnakinEngine( + int max_batch_size = 1, + std::map> max_input_shape = {}); + + /** A boolean state indicating whether the Anakin sub-graph engine is used. + */ + bool anakin_engine_enabled() const { return use_anakin_; } /** \brief Control whether to debug IR graph analysis phase. * @@ -185,6 +197,7 @@ struct AnalysisConfig { /** A boolean state telling whether the model is set from the CPU memory. */ bool model_from_memory() const { return model_from_memory_; } + void SetEngineOptInfo(std::map engine_opt_info); /** Turn on memory optimize * NOTE still in development, will release latter. @@ -258,6 +271,10 @@ struct AnalysisConfig { std::string serialized_info_cache_; mutable std::unique_ptr pass_builder_; + bool use_anakin_{false}; + int anakin_max_batchsize_; + std::map> anakin_max_input_shape_; + std::map engine_opt_info_; }; } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 703fd18069474f28b29c6f16c6308fc19bd3527f..87f40f09eb9bb552bd246cb39bbbd41abac1c9ac 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -177,6 +177,8 @@ class ZeroCopyTensor { device_ = device; } + PaddleDType type() const; + protected: explicit ZeroCopyTensor(void* scope) : scope_{scope} {} void SetName(const std::string& name) { name_ = name; } @@ -191,6 +193,7 @@ class ZeroCopyTensor { // performance. mutable void* tensor_{nullptr}; PaddlePlace place_; + PaddleDType dtype_; int device_; }; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index d413a418c88241a15808474f753a3900e0a5293e..35dd1176718a3d7e4f3867ce048216ea45e5ba7f 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -68,10 +68,26 @@ void GpuPassStrategy::EnableMKLDNN() { LOG(ERROR) << "GPU not support MKLDNN yet"; } +// The following passes works for Anakin sub-graph engine. +const std::vector kAnakinSubgraphPasses({ + "infer_clean_graph_pass", // + "simplify_anakin_detection_pattern_pass5", // + "simplify_anakin_detection_pattern_pass4", // + "simplify_anakin_detection_pattern_pass3", // + "simplify_anakin_detection_pattern_pass2", // + "anakin_fillconstant_elementwisemul_fuse", // + "fc_fuse_pass", // + "conv_elementwise_add_fuse_pass", // + "conv_bn_fuse_pass", // + "conv_elementwise_add_fuse_pass", // + "fc_gru_fuse_pass", // + "anakin_subgraph_pass", +}); + GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { passes_.assign({ - "infer_clean_graph_pass", // - "identity_scale_op_clean_pass", // + "infer_clean_graph_pass", // + // "identity_scale_op_clean_pass", // "conv_affine_channel_fuse_pass", // "conv_eltwiseadd_affine_channel_fuse_pass", // "conv_bn_fuse_pass", // @@ -84,7 +100,7 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { #endif }); - for (int i = 6; i >= 3; i--) { + for (int i = 6; i >= 2; i--) { passes_.push_back("transpose_flatten" + std::to_string(i) + "_concat_fuse_pass"); } @@ -124,4 +140,5 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { }); use_gpu_ = false; } +void PaddlePassBuilder::ClearPasses() { passes_.clear(); } } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 84645fef018ce41ee2cba7ae25d2b0c13e49dfc0..65403e790e93b1a4f99803472d77b0e7a02a4fec 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -45,6 +45,7 @@ class PaddlePassBuilder { /** Delete all the passes that has type `pass_type`. */ void DeletePass(const std::string &pass_type); + void ClearPasses(); /** Append an analysis pass. */ void AppendAnalysisPass(const std::string &pass); @@ -157,4 +158,6 @@ class GpuPassStrategy : public PassStrategy { virtual ~GpuPassStrategy() = default; }; +extern const std::vector kAnakinSubgraphPasses; + } // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.h b/paddle/fluid/inference/tensorrt/convert/io_converter.h index 71c48e085d25d2bc6720d93735f661f9e3af7b40..5daa242f6ab802a50fa6105f0102b817b700f461 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.h @@ -45,7 +45,7 @@ class EngineIOConverter { static void ConvertInput(const std::string& op_type, const LoDTensor& in, void* out, size_t max_size, cudaStream_t* stream) { PADDLE_ENFORCE(stream != nullptr); - auto* converter = Registry::Lookup( + auto* converter = Registry::Global().Lookup( op_type, "default" /* default_type */); PADDLE_ENFORCE_NOT_NULL(converter); converter->SetStream(stream); @@ -56,7 +56,7 @@ class EngineIOConverter { LoDTensor* out, size_t max_size, cudaStream_t* stream) { PADDLE_ENFORCE(stream != nullptr); - auto* converter = Registry::Lookup( + auto* converter = Registry::Global().Lookup( op_type, "default" /* default_type */); PADDLE_ENFORCE_NOT_NULL(converter); converter->SetStream(stream); @@ -69,12 +69,12 @@ class EngineIOConverter { cudaStream_t* stream_{nullptr}; }; -#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \ - struct trt_io_##op_type__##_converter { \ - trt_io_##op_type__##_converter() { \ - Registry::Register(#op_type__); \ - } \ - }; \ +#define REGISTER_TENSORRT_IO_CONVERTER(op_type__, Converter__) \ + struct trt_io_##op_type__##_converter { \ + trt_io_##op_type__##_converter() { \ + Registry::Global().Register(#op_type__); \ + } \ + }; \ trt_io_##op_type__##_converter trt_io_##op_type__##_converter__; } // namespace tensorrt diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 90ed90b1e2907cc4be6f507890bae8df5a44ee38..55515569ead6e40c9b1b45fe31189dab7e2f2bb4 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -86,7 +86,7 @@ class OpConverter { PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); std::string Y = op_desc.Input("Y")[0]; if (parameters.count(Y)) { - it = Registry::Lookup("fc"); + it = Registry::Global().Lookup("fc"); } } if (op_desc.Type().find("elementwise") != std::string::npos) { @@ -103,28 +103,28 @@ class OpConverter { if (parameters.count(Y)) { PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0, "Unsupported elementwise type" + op_type); - it = - Registry::Lookup("elementwise_" + op_type + "_weight"); + it = Registry::Global().Lookup("elementwise_" + op_type + + "_weight"); PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_desc.Type()); } else { PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, "Unsupported elementwise type" + op_type); - it = - Registry::Lookup("elementwise_" + op_type + "_tensor"); + it = Registry::Global().Lookup("elementwise_" + op_type + + "_tensor"); } PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_desc.Type()); } if (op_desc.Type() == "depthwise_conv2d") { - it = Registry::Lookup("conv2d"); + it = Registry::Global().Lookup("conv2d"); PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_desc.Type()); } if (!it) { - it = Registry::Lookup(op_desc.Type()); + it = Registry::Global().Lookup(op_desc.Type()); } PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_desc.Type()); @@ -198,9 +198,9 @@ class OpConverter { #define REGISTER_TRT_OP_CONVERTER(op_type__, Converter__) \ struct trt_##op_type__##_converter : public ::paddle::framework::Registrar { \ trt_##op_type__##_converter() { \ - ::paddle::inference:: \ - Registry::Register< \ - ::paddle::inference::tensorrt::Converter__>(#op_type__); \ + ::paddle::inference::Registry< \ + paddle::inference::tensorrt::OpConverter>::Global() \ + .Register<::paddle::inference::tensorrt::Converter__>(#op_type__); \ } \ }; \ trt_##op_type__##_converter trt_##op_type__##_converter__; \ diff --git a/paddle/fluid/inference/utils/singleton.h b/paddle/fluid/inference/utils/singleton.h index cfb89e704457a11a3cd6e89dba5efad5acae0bce..990bef359499834c3a7cb025c3fb1d94ceea958e 100644 --- a/paddle/fluid/inference/utils/singleton.h +++ b/paddle/fluid/inference/utils/singleton.h @@ -45,13 +45,13 @@ struct Registry { } template - static void Register(const std::string& name) { + void Register(const std::string& name) { PADDLE_ENFORCE_EQ(items_.count(name), 0); items_[name] = new ItemChild; } - static ItemParent* Lookup(const std::string& name, - const std::string& default_name = "") { + ItemParent* Lookup(const std::string& name, + const std::string& default_name = "") { auto it = items_.find(name); if (it == items_.end()) { if (default_name == "") @@ -70,11 +70,8 @@ struct Registry { private: Registry() = default; - static std::unordered_map items_; + std::unordered_map items_; }; -template -std::unordered_map Registry::items_; - } // namespace inference } // namespace paddle diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 651c5e6e75834c27313abd79a33bedb62ecd2632..afac8e4d2a39cae37e33b02135bc2c41b5ceb0c3 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -34,6 +34,10 @@ if (WITH_GPU AND TENSORRT_FOUND) add_subdirectory(tensorrt) endif() +if (ANAKIN_FOUND) + add_subdirectory(anakin) +endif() + SET(OP_HEADER_DEPS xxhash) if (WITH_GPU) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) diff --git a/paddle/fluid/operators/anakin/CMakeLists.txt b/paddle/fluid/operators/anakin/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5eacefc645bab288da7c289a5d7701abbcbef03d --- /dev/null +++ b/paddle/fluid/operators/anakin/CMakeLists.txt @@ -0,0 +1,2 @@ +op_library(anakin_engine_op DEPS anakin_engine anakin_op_converter) +# file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(anakin_engine);\n") diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.cc b/paddle/fluid/operators/anakin/anakin_engine_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..58db16ea0c1347a366a4d5927e414d76864cb6ab --- /dev/null +++ b/paddle/fluid/operators/anakin/anakin_engine_op.cc @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_CUDA + +#include +#include + +#include "paddle/fluid/operators/anakin/anakin_engine_op.h" + +namespace paddle { + +namespace operators { + +class AnakinEngineOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Xs", "A list of inputs.").AsDuplicable(); + AddOutput("Ys", "A list of outputs").AsDuplicable(); + AddAttr("subgraph", "the subgraph."); + AddAttr( + "engine_key", + "The engine_key here is used to distinguish different TRT Engines"); + AddAttr("sub_block", "the trt block"); + AddComment("Anakin engine operator."); + } +}; + +class AnakinEngineInferVarType : public framework::VarTypeInference { + public: + void operator()(framework::InferVarTypeContext *ctx) const override {} +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(anakin_engine, ops::AnakinEngineOp, ops::AnakinEngineOpMaker, + ops::AnakinEngineOpMaker); + +#endif // PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/anakin/anakin_engine_op.h b/paddle/fluid/operators/anakin/anakin_engine_op.h new file mode 100644 index 0000000000000000000000000000000000000000..9d5b4f6f54ccfc9802cef6abac428e28a72ac293 --- /dev/null +++ b/paddle/fluid/operators/anakin/anakin_engine_op.h @@ -0,0 +1,162 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef PADDLE_WITH_CUDA + +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/inference/anakin/convert/op_converter.h" +#include "paddle/fluid/inference/anakin/engine.h" +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace operators { + +using FluidDT = framework::proto::VarType_Type; +using inference::Singleton; + +using anakin::graph::GraphGlobalMem; +using anakin::AK_FLOAT; +using anakin::Precision; +using anakin::saber::NV; +using anakin::saber::X86; +using anakin::saber::Shape; +using anakin::PBlock; +using anakin::PTuple; +using inference::anakin::AnakinEngine; + +class AnakinEngineOp : public framework::OperatorBase { + using AnakinNvEngineT = AnakinEngine; + + private: + std::vector input_names_; + std::unordered_set param_names_; + mutable AnakinNvEngineT *anakin_engine_; + std::string engine_key_; + std::string engine_serialized_data_; + + public: + AnakinEngineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : framework::OperatorBase(type, inputs, outputs, attrs) { + input_names_ = Inputs("Xs"); + engine_key_ = Attr("engine_key"); + auto params = Attr>("parameters"); + for (const auto ¶m : params) { + param_names_.insert(param); + } + anakin_engine_ = nullptr; + } + + protected: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { + RunAnakin(scope, dev_place); + } + + void RunAnakin(const framework::Scope &scope, + const platform::Place &dev_place) const { + auto *engine = GetEngine(scope, dev_place); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(dev_place); + auto stream = + reinterpret_cast(dev_ctx).stream(); + + PADDLE_ENFORCE(!input_names_.empty(), "should pass more than one inputs"); + + std::vector output_maps = + Attr>("output_name_mapping"); + + std::map inputs; + // Convert input tensor from fluid to engine. + for (const auto &x : Inputs("Xs")) { + if (param_names_.count(x)) continue; + auto &t = + inference::analysis::GetFromScope(scope, x); + + inputs.insert({x, &t}); + } + + std::map outputs; + int output_index = 0; + for (const auto &y : Outputs("Ys")) { + auto *fluid_v = scope.FindVar(y); + PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y); + auto *fluid_t = fluid_v->GetMutable(); + outputs.insert({output_maps[output_index], fluid_t}); + output_index += 1; + } + engine->Execute(inputs, outputs, stream); + } + + AnakinNvEngineT *GetEngine(const framework::Scope &scope, + const platform::Place &dev_place) const { + if (anakin_engine_ == nullptr) { + anakin_engine_ = + inference::Singleton::Global() + .Get(engine_key_); + } + + return anakin_engine_; + } + + void Prepare(const framework::Scope &scope, const platform::Place &dev_place, + AnakinNvEngineT *engine) const { + LOG(INFO) << "Prepare Anakin engine (Optimize model structure, Select OP " + "kernel etc). This process may cost a lot of time."; + framework::proto::BlockDesc block_desc; + block_desc.ParseFromString(Attr("subgraph")); + + std::vector output_maps = + Attr>("output_name_mapping"); + + inference::Singleton::Global() + .ConvertBlock(block_desc, param_names_, scope, engine); + engine->Freeze(); + for (const auto &x : Inputs("Xs")) { + if (param_names_.count(x)) continue; + auto &t = + inference::analysis::GetFromScope(scope, x); + auto t_shape = framework::vectorize2int(t.dims()); + // all input shape should be 4 dims + if (t_shape.size() == 2) { + t_shape.push_back(1); + t_shape.push_back(1); + } + engine->SetInputShape(x, t_shape); + } + + engine->Optimize(); + + engine->InitGraph(); + } +}; + +} // namespace operators +} // namespace paddle + +#endif // PADDLE_WITH_CUDA diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index d54a3e8670e892f4e0d9ebb60ab26714ac8c0c68..48002a7620221aad90926ccd3eb89319d6e516b4 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -212,6 +212,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { CudnnHolder::CudnnHolder(const cudaStream_t* stream, const CUDAPlace& place) : workspace_(nullptr), stream_(stream), place_(place) { + PADDLE_ENFORCE(cudaSetDevice(place_.device)); PADDLE_ENFORCE(dynload::cudnnCreate(&cudnn_handle_)); PADDLE_ENFORCE(dynload::cudnnSetStream(cudnn_handle_, *stream_)); } @@ -252,10 +253,6 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) #endif } - if (dynload::HasCUDNN()) { - cudnn_holder_.reset(new CudnnHolder(&stream_, place)); - } - driver_version_ = GetCUDADriverVersion(place_.device); runtime_version_ = GetCUDARuntimeVersion(place_.device); @@ -348,12 +345,21 @@ bool CUDADeviceContext::tensor_core_available() const { return cublas_tensor_core_handle_ != nullptr; } +CudnnHolder* CUDADeviceContext::cudnn_holder() const { + std::call_once(init_cudnn_, [&]() { + if (dynload::HasCUDNN()) { + cudnn_holder_.reset(new CudnnHolder(&stream_, place_)); + } + }); + return cudnn_holder_.get(); +} + cudnnHandle_t CUDADeviceContext::cudnn_handle() const { - return cudnn_holder_->cudnn_handle(); + return cudnn_holder()->cudnn_handle(); } CudnnWorkspaceHandle CUDADeviceContext::cudnn_workspace_handle() const { - return CudnnWorkspaceHandle(cudnn_holder_.get()); + return CudnnWorkspaceHandle(cudnn_holder()); } cudaStream_t CUDADeviceContext::stream() const { return stream_; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 9dbc72f561b04b3005e2ef029e0c4ea6c2c312b1..778f6613bd49dfbc46e8888cd53b1a4de5fe923d 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -292,9 +292,11 @@ class CUDADeviceContext : public DeviceContext { private: CUDAPlace place_; + mutable std::once_flag init_cudnn_; + std::unique_ptr eigen_device_; std::unique_ptr eigen_stream_; - std::unique_ptr cudnn_holder_; + mutable std::unique_ptr cudnn_holder_; cudaStream_t stream_; std::unique_ptr cublas_handle_; @@ -317,6 +319,7 @@ class CUDADeviceContext : public DeviceContext { // StreamCallbackManager is thread-safe std::unique_ptr callback_manager_; + CudnnHolder* cudnn_holder() const; DISABLE_COPY_AND_ASSIGN(CUDADeviceContext); };