未验证 提交 e1b0d7cb 编写于 作者: 石晓伟 提交者: GitHub

remove anakin from code, test=develop (#22420)

上级 0a678ca0
...@@ -151,7 +151,6 @@ if(NOT WIN32) ...@@ -151,7 +151,6 @@ if(NOT WIN32)
include(cupti) include(cupti)
endif() endif()
include(anakin_subgraph)
include(flags) # set paddle compile flags include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
......
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
NO_DEFAULT_PATH
)
find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
PATHS ${ANAKIN_ROOT}
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
NO_DEFAULT_PATH
DOC "Path to ANAKIN library.")
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
set(ANAKIN_FOUND ON)
else()
set(ANAKIN_FOUND OFF)
endif()
if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT})
include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/saber)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()
if(ANAKIN_FOUND)
if (ANAKIN_MLU AND NOT WITH_GPU AND NOT ANAKIN_X86)
message(STATUS "Compile with anakin mlu place.")
add_definitions(-DANAKIN_MLU_PLACE)
elseif(ANAKIN_BM AND NOT WITH_GPU AND NOT ANAKIN_X86)
message(STATUS "Compile with anakin bm place.")
add_definitions(-DANAKIN_BM_PLACE)
elseif(ANAKIN_X86)
message(STATUS "Compile with anakin x86 place.")
add_definitions(-DANAKIN_X86_PLACE)
endif()
endif()
if(ANAKIN_FOUND AND WITH_GPU AND WITH_DSO)
message(STATUS "Compile with anakin subgraph.")
set(ANAKIN_SUBGRAPH ON)
endif()
...@@ -109,16 +109,6 @@ if(WITH_GPU) ...@@ -109,16 +109,6 @@ if(WITH_GPU)
endif() endif()
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
endif() endif()
if(ANAKIN_FOUND)
if(${CUDA_VERSION_MAJOR} VERSION_LESS 8)
message(WARNING "Anakin needs CUDA >= 8.0 to compile. Force ANAKIN_FOUND = OFF")
set(ANAKIN_FOUND OFF CACHE STRING "Anakin is valid only when CUDA >= 8.0." FORCE)
endif()
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
message(WARNING "Anakin needs CUDNN >= 7.0 to compile. Force ANAKIN_FOUND = OFF")
set(ANAKIN_FOUND OFF CACHE STRING "Anakin is valid only when CUDNN >= 7.0." FORCE)
endif()
endif()
elseif(WITH_AMD_GPU) elseif(WITH_AMD_GPU)
add_definitions(-DPADDLE_WITH_HIP) add_definitions(-DPADDLE_WITH_HIP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
......
...@@ -137,13 +137,6 @@ function(copy_part_of_thrid_party TARGET DST) ...@@ -137,13 +137,6 @@ function(copy_part_of_thrid_party TARGET DST)
SRCS ${LITE_BINARY_DIR}/inference_lite_lib/* SRCS ${LITE_BINARY_DIR}/inference_lite_lib/*
DSTS ${dst_dir}) DSTS ${dst_dir})
endif() endif()
if (ANAKIN_FOUND)
set(dst_dir "${DST}/third_party/install/anakin")
copy(${TARGET}
SRCS ${ANAKIN_ROOT}/*
DSTS ${dst_dir})
endif ()
endfunction() endfunction()
# inference library for only inference # inference library for only inference
......
...@@ -71,7 +71,6 @@ pass_library(identity_scale_op_clean_pass base) ...@@ -71,7 +71,6 @@ pass_library(identity_scale_op_clean_pass base)
pass_library(sync_batch_norm_pass base) pass_library(sync_batch_norm_pass base)
pass_library(runtime_context_cache_pass base) pass_library(runtime_context_cache_pass base)
pass_library(quant_conv2d_dequant_fuse_pass inference) pass_library(quant_conv2d_dequant_fuse_pass inference)
pass_library(fillconstant_elementwisemul_fuse inference)
pass_library(shuffle_channel_detect_pass inference) pass_library(shuffle_channel_detect_pass inference)
pass_library(delete_quant_dequant_op_pass inference) pass_library(delete_quant_dequant_op_pass inference)
pass_library(simplify_with_basic_ops_pass base) pass_library(simplify_with_basic_ops_pass base)
...@@ -81,10 +80,6 @@ if(WITH_GPU) ...@@ -81,10 +80,6 @@ if(WITH_GPU)
pass_library(cudnn_placement_pass base DEPS placement_pass_base) pass_library(cudnn_placement_pass base DEPS placement_pass_base)
endif() endif()
if(ANAKIN_SUBGRAPH)
pass_library(simplify_anakin_priorbox_detection_out_pass inference)
endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn)
pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn)
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
namespace paddle {
namespace framework {
namespace ir {
#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
#define GET_NODES \
GET_IR_NODE(fill_constant); \
GET_IR_NODE(fill_constant_out); \
GET_IR_NODE(elementwise_mul); \
GET_IR_NODE(elementwise_mul_out);
void FillconstantElementwisemulFuse::ApplyImpl(ir::Graph* graph) const {
const std::string pattern_name = "fillconstant_elementwisemul_fuse";
FusePassBase::Init(pattern_name, graph);
GraphPatternDetector gpd;
auto* x = gpd.mutable_pattern()
->NewNode("x")
->assert_is_op_input("elementwise_mul", "X")
->AsInput();
patterns::FillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(),
pattern_name);
pattern(x);
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_NODES;
PADDLE_ENFORCE(subgraph.count(x));
auto* elementwise_in = subgraph.at(x);
float constant_value =
boost::get<float>(fill_constant->Op()->GetAttr("value"));
framework::OpDesc new_op_desc;
new_op_desc.SetType("scale");
new_op_desc.SetInput("X", {elementwise_in->Name()});
new_op_desc.SetAttr("scale", constant_value);
new_op_desc.SetAttr("bias", static_cast<float>(0.0));
new_op_desc.SetAttr("bias_after_scale", true);
new_op_desc.SetOutput("Out", {elementwise_mul_out->Name()});
new_op_desc.Flush();
// Create a new node for the fused op.
auto* scale_op = graph->CreateOpNode(&new_op_desc);
IR_NODE_LINK_TO(elementwise_in, scale_op); // Input
IR_NODE_LINK_TO(scale_op, elementwise_mul_out); // Output
// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph,
{fill_constant, fill_constant_out, elementwise_mul});
};
gpd(graph, handler);
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(fillconstant_elementwisemul_fuse,
paddle::framework::ir::FillconstantElementwisemulFuse);
...@@ -1878,173 +1878,6 @@ PDNode *patterns::TransposeFlattenConcat::operator()( ...@@ -1878,173 +1878,6 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
return concat_out; return concat_out;
} }
PDNode *patterns::AnakinDetectionPattern::operator()(
std::vector<PDNode *> conv_in, int times, std::string priorbox_type,
bool is_reshape) {
// The times represents the repeat times of the
// {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
const int kNumFields = 7;
const int kPriorBoxLocOffset = 1;
const int kReshape1Offset = 2;
const int kReshape1OutOffset = 3;
const int kPriorBoxVarOffset = 4;
const int kReshape2Offset = 5;
const int kReshape2OutOffset = 6;
const int kBoxCoderThirdInputOffset = times;
const int kMultiClassSecondInputNmsOffset = times + 1;
std::vector<PDNode *> nodes;
std::string op_after_priorbox = is_reshape ? "reshape2" : "flatten2";
for (int i = 0; i < times; i++) {
nodes.push_back(
pattern->NewNode(GetNodeName("prior_box" + std::to_string(i)))
->assert_is_op(priorbox_type));
nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i)))
->assert_is_op_output(priorbox_type, "Boxes")
->assert_is_op_input(op_after_priorbox, "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape1" + std::to_string(i)))
->assert_is_op(op_after_priorbox));
nodes.push_back(
pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i)))
->assert_is_op_output(op_after_priorbox)
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i)))
->assert_is_op_output(priorbox_type, "Variances")
->assert_is_op_input(op_after_priorbox, "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape2" + std::to_string(i)))
->assert_is_op(op_after_priorbox));
nodes.push_back(
pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i)))
->assert_is_op_output(op_after_priorbox)
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());
}
auto concat_op1 = pattern->NewNode(GetNodeName("concat1"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out1 = pattern->NewNode(GetNodeName("concat1_out"))
->assert_is_op_output("concat")
->AsIntermediate();
auto concat_op2 = pattern->NewNode(GetNodeName("concat2"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out2 = pattern->NewNode(GetNodeName("concat2_out"))
->assert_is_op_output("concat")
->AsIntermediate();
auto box_coder_op = pattern->NewNode(GetNodeName("box_coder"))
->assert_is_op("box_coder")
->assert_op_has_n_inputs("box_coder", 3);
auto box_coder_out = pattern->NewNode(GetNodeName("box_coder_out"))
->assert_is_op_output("box_coder")
->AsIntermediate();
auto transpose_before_nms =
pattern->NewNode(GetNodeName("transpose_before_nms"))
->assert_is_op("transpose2");
auto transpose_before_nms_out =
pattern->NewNode(GetNodeName("transpose_before_nms_out"))
->assert_is_op_output("transpose2")
->assert_is_op_input("multiclass_nms", "Scores")
->AsIntermediate();
auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms"))
->assert_is_op("multiclass_nms")
->assert_op_has_n_inputs("multiclass_nms", 2);
auto multiclass_nms_out = pattern->NewNode(GetNodeName("multiclass_nms_out"))
->assert_is_op_output("multiclass_nms")
->AsOutput();
std::vector<PDNode *> reshape1_outs;
std::vector<PDNode *> reshape2_outs;
for (int i = 0; i < times; i++) {
conv_in[i]->AsInput();
// prior_box
nodes[i * kNumFields]->LinksFrom({conv_in[i]});
// prior_box box out
nodes[i * kNumFields + kPriorBoxLocOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape1Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxLocOffset]});
// reshape_out
nodes[i * kNumFields + kReshape1OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape1Offset]});
nodes[i * kNumFields + kPriorBoxVarOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape2Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxVarOffset]});
// reshape_out
nodes[i * kNumFields + kReshape2OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape2Offset]});
reshape1_outs.push_back(nodes[i * kNumFields + kReshape1OutOffset]);
reshape2_outs.push_back(nodes[i * kNumFields + kReshape2OutOffset]);
}
concat_op1->LinksFrom(reshape1_outs);
concat_op2->LinksFrom(reshape2_outs);
concat_out1->LinksFrom({concat_op1});
concat_out2->LinksFrom({concat_op2});
conv_in[kBoxCoderThirdInputOffset]->AsInput();
conv_in[kMultiClassSecondInputNmsOffset]->AsInput();
box_coder_op->LinksFrom(
{concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]});
box_coder_out->LinksFrom({box_coder_op});
transpose_before_nms->LinksFrom({conv_in[kMultiClassSecondInputNmsOffset]});
transpose_before_nms_out->LinksFrom({transpose_before_nms});
multiclass_nms_op->LinksFrom({box_coder_out, transpose_before_nms_out})
.LinksTo({multiclass_nms_out});
return multiclass_nms_out;
}
PDNode *patterns::FillConstantElementWiseMulFuse::operator()(
PDNode *elementwise_op_input) {
auto fill_constant =
pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant");
auto fill_constant_out = pattern->NewNode(fill_constant_out_repr())
->assert_is_op_output("fill_constant")
->assert_is_op_input("elementwise_mul", "Y")
->AsIntermediate();
auto elementwise_mul_op =
pattern->NewNode(elementwise_mul_repr())->assert_is_op("elementwise_mul");
auto elementwise_mul_out = pattern->NewNode(elementwise_mul_out_repr())
->assert_is_op_output("elementwise_mul")
->AsOutput();
fill_constant_out->LinksFrom({fill_constant});
elementwise_mul_op->LinksFrom({elementwise_op_input, fill_constant_out});
elementwise_mul_out->LinksFrom({elementwise_mul_op});
return elementwise_mul_out;
}
void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input, void patterns::QuantDequantOpFuse::operator()(PDNode *quant_op_input,
const std::string &op_type, const std::string &op_type,
const std::string &weight_name, const std::string &weight_name,
......
...@@ -1093,37 +1093,6 @@ struct TransposeFlattenConcat : public PatternBase { ...@@ -1093,37 +1093,6 @@ struct TransposeFlattenConcat : public PatternBase {
} }
}; };
struct AnakinDetectionPattern : public PatternBase {
AnakinDetectionPattern(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "anakin_detect_pattern") {}
PDNode* operator()(std::vector<PDNode*> conv_inputs, int times,
std::string priorbox_type, bool is_reshape);
std::string GetNodeName(const std::string& op_type) {
return PDNodeName(name_scope_, repr_, id_, op_type);
}
PDNode* GetPDNode(const std::string& op_type) {
return pattern->RetrieveNode(GetNodeName(op_type));
}
};
struct FillConstantElementWiseMulFuse : public PatternBase {
FillConstantElementWiseMulFuse(PDPattern* pattern,
const std::string& name_scope)
: PatternBase(pattern, name_scope,
"anakin_fillconstant_elementwisemul_fuse") {}
PDNode* operator()(PDNode* elementwise_op_input);
// declare operator node's name
PATTERN_DECL_NODE(fill_constant);
PATTERN_DECL_NODE(fill_constant_out);
PATTERN_DECL_NODE(elementwise_mul);
PATTERN_DECL_NODE(elementwise_mul_out);
};
struct QuantDequantOpFuse : public PatternBase { struct QuantDequantOpFuse : public PatternBase {
QuantDequantOpFuse(PDPattern* pattern, const std::string& name_scope) QuantDequantOpFuse(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "quant_dequant_fuse") {} : PatternBase(pattern, name_scope, "quant_dequant_fuse") {}
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/ir/simplify_anakin_priorbox_detection_out_pass.h"
namespace paddle {
namespace framework {
namespace ir {
void RunSimplifyAnakinDetection(ir::Graph *graph, int times, bool is_density,
bool is_reshape) {
const std::string pattern_name =
"simplify_anakin_detection_pattern_pass" + std::to_string(times);
std::string priorbox_type = is_density ? "density_prior_box" : "prior_box";
GraphPatternDetector gpd;
std::vector<PDNode *> input_nodes;
for (int i = 0; i < times; i++) {
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(i))
->assert_is_op_input(priorbox_type, "Input")
->AsInput());
}
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(times))
->assert_is_op_input("box_coder", "TargetBox")
->AsInput());
input_nodes.push_back(gpd.mutable_pattern()
->NewNode("x" + std::to_string(times + 1))
->assert_is_op_input("transpose2")
->AsInput());
patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
pattern(input_nodes, times, priorbox_type, is_reshape);
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
const int kNumFields = 7;
const int kPriorBoxLocOffset = 1;
const int kReshape1Offset = 2;
const int kReshape1OutOffset = 3;
const int kPriorBoxVarOffset = 4;
const int kReshape2Offset = 5;
const int kReshape2OutOffset = 6;
std::vector<Node *> nodes;
for (int i = 0; i < times; i++) {
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
PADDLE_ENFORCE(
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("prior_box" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("box_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape1" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape1_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("box_var_out" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape2" + std::to_string(i))));
nodes.push_back(
subgraph.at(pattern.GetPDNode("reshape2_out" + std::to_string(i))));
}
Node *concat_op1 = subgraph.at(pattern.GetPDNode("concat1"));
Node *concat_out1 = subgraph.at(pattern.GetPDNode("concat1_out"));
Node *concat_op2 = subgraph.at(pattern.GetPDNode("concat2"));
Node *concat_out2 = subgraph.at(pattern.GetPDNode("concat2_out"));
Node *box_coder_third_input = subgraph.at(input_nodes[times]);
Node *box_coder_op = subgraph.at(pattern.GetPDNode("box_coder"));
Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out"));
Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]);
Node *transpose_before_nms =
subgraph.at(pattern.GetPDNode("transpose_before_nms"));
Node *transpose_before_nms_out =
subgraph.at(pattern.GetPDNode("transpose_before_nms_out"));
Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms"));
Node *multiclass_nms_out =
subgraph.at(pattern.GetPDNode("multiclass_nms_out"));
std::string code_type =
boost::get<std::string>(box_coder_op->Op()->GetAttr("code_type"));
bool box_normalized =
boost::get<bool>(box_coder_op->Op()->GetAttr("box_normalized"));
int background_label =
boost::get<int>(multiclass_nms->Op()->GetAttr("background_label"));
float score_threshold =
boost::get<float>(multiclass_nms->Op()->GetAttr("score_threshold"));
int nms_top_k = boost::get<int>(multiclass_nms->Op()->GetAttr("nms_top_k"));
float nms_threshold =
boost::get<float>(multiclass_nms->Op()->GetAttr("nms_threshold"));
float nms_eta = boost::get<float>(multiclass_nms->Op()->GetAttr("nms_eta"));
int keep_top_k =
boost::get<int>(multiclass_nms->Op()->GetAttr("keep_top_k"));
std::vector<std::string> concat1_input_names;
for (int i = 0; i < times; i++) {
concat1_input_names.push_back(
nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
}
framework::OpDesc concat1_desc;
concat1_desc.SetType("concat");
concat1_desc.SetInput("X", concat1_input_names);
concat1_desc.SetAttr("axis", 2);
concat1_desc.SetOutput("Out", {concat_out1->Name()});
auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc);
for (int i = 0; i < times; i++) {
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(
new_add_concat_op);
new_add_concat_op->inputs.push_back(
nodes[i * kNumFields + kPriorBoxLocOffset]);
}
framework::OpDesc new_op_desc;
new_op_desc.SetType("detection_out");
new_op_desc.SetInput("PriorBox", {concat_out1->Name()});
new_op_desc.SetInput("TargetBox", {box_coder_third_input->Name()});
new_op_desc.SetInput("Scores", {multiclass_nms_second_input->Name()});
new_op_desc.SetAttr("code_type", code_type);
new_op_desc.SetAttr("box_normalized", box_normalized);
new_op_desc.SetAttr("background_label", background_label);
new_op_desc.SetAttr("score_threshold", score_threshold);
new_op_desc.SetAttr("nms_top_k", nms_top_k);
new_op_desc.SetAttr("nms_threshold", nms_threshold);
new_op_desc.SetAttr("nms_eta", nms_eta);
new_op_desc.SetAttr("keep_top_k", keep_top_k);
new_op_desc.SetOutput("Out", {multiclass_nms_out->Name()});
new_op_desc.Flush();
// Create a new node for the fused op.
auto *detection_out_op = graph->CreateOpNode(&new_op_desc);
std::unordered_set<const Node *> delete_nodes;
for (int i = 0; i < times; i++) {
nodes[i * kNumFields + kPriorBoxLocOffset]->outputs.push_back(concat_op1);
delete_nodes.insert(nodes[i * kNumFields + kReshape1Offset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape1OutOffset]);
delete_nodes.insert(nodes[i * kNumFields + kPriorBoxVarOffset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape2Offset]);
delete_nodes.insert(nodes[i * kNumFields + kReshape2OutOffset]);
}
delete_nodes.insert(concat_op1);
delete_nodes.insert(concat_op2);
delete_nodes.insert(concat_out2);
delete_nodes.insert(box_coder_op);
delete_nodes.insert(box_coder_out);
delete_nodes.insert(transpose_before_nms);
delete_nodes.insert(transpose_before_nms_out);
delete_nodes.insert(multiclass_nms);
new_add_concat_op->outputs.push_back(concat_out1);
concat_out1->inputs.push_back(new_add_concat_op);
detection_out_op->inputs.push_back(concat_out1);
detection_out_op->inputs.push_back(box_coder_third_input);
detection_out_op->inputs.push_back(multiclass_nms_second_input);
detection_out_op->outputs.push_back(multiclass_nms_out);
concat_out1->outputs.push_back(detection_out_op);
box_coder_third_input->outputs.push_back(detection_out_op);
multiclass_nms_second_input->outputs.push_back(detection_out_op);
multiclass_nms_out->inputs.push_back(detection_out_op);
// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph, delete_nodes);
};
gpd(graph, handler);
}
void SimplifyAnakinDetectionPatternPass::ApplyImpl(ir::Graph *graph) const {
const int pattern_nums = 6;
const std::string pattern_name = "simplify_anakin_detection_pattern_pass";
FusePassBase::Init(pattern_name, graph);
std::vector<bool> options = {true, false};
for (const auto &is_density : options) {
for (const auto &is_reshape : options) {
for (int i = 1; i <= pattern_nums; i++) {
RunSimplifyAnakinDetection(graph, i, is_density, is_reshape);
}
}
}
}
} // namespace ir
} // namespace framework
} // namespace paddle
typedef paddle::framework::ir::SimplifyAnakinDetectionPatternPass
priorbox_pattern;
REGISTER_PASS(simplify_anakin_priorbox_detection_out_pass, priorbox_pattern);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <unordered_set>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace paddle {
namespace framework {
namespace ir {
// There may be many transpose-flatten structures in a model, and the output of
// these structures will be used as inputs to the concat Op. This pattern will
// be detected by our pass. The times here represents the repeat times of this
// structure.
class SimplifyAnakinDetectionPatternPass : public FusePassBase {
public:
virtual ~SimplifyAnakinDetectionPatternPass() {}
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
...@@ -65,7 +65,7 @@ class SubGraphFuser { ...@@ -65,7 +65,7 @@ class SubGraphFuser {
using NodeInsideSubgraphTeller = SubgraphDetector::NodeInsideSubgraphTeller; using NodeInsideSubgraphTeller = SubgraphDetector::NodeInsideSubgraphTeller;
SubGraphFuser(Graph *graph, const NodeInsideSubgraphTeller &teller, SubGraphFuser(Graph *graph, const NodeInsideSubgraphTeller &teller,
int min_subgraph_size, std::string name = "anakin_engine") int min_subgraph_size, std::string name = "tensorrt_engine")
: graph_(graph), : graph_(graph),
node_inside_subgraph_teller_(teller), node_inside_subgraph_teller_(teller),
min_subgraph_size_{min_subgraph_size}, min_subgraph_size_{min_subgraph_size},
......
...@@ -30,10 +30,6 @@ if (TENSORRT_FOUND) ...@@ -30,10 +30,6 @@ if (TENSORRT_FOUND)
add_subdirectory(tensorrt) add_subdirectory(tensorrt)
endif() endif()
if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin)
endif()
if (WITH_LITE) if (WITH_LITE)
add_subdirectory(lite) add_subdirectory(lite)
endif() endif()
...@@ -68,9 +64,6 @@ if(NOT APPLE) ...@@ -68,9 +64,6 @@ if(NOT APPLE)
set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}") set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif() endif()
if(ANAKIN_FOUND)
set(ANAKIN_SHARED_INFERENCE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/api/api_anakin_engine.cc)
endif()
set(SHARED_INFERENCE_SRCS set(SHARED_INFERENCE_SRCS
io.cc io.cc
${CMAKE_CURRENT_SOURCE_DIR}/../framework/data_feed.cc ${CMAKE_CURRENT_SOURCE_DIR}/../framework/data_feed.cc
...@@ -80,8 +73,7 @@ set(SHARED_INFERENCE_SRCS ...@@ -80,8 +73,7 @@ set(SHARED_INFERENCE_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc
${mkldnn_quantizer_src_file} ${mkldnn_quantizer_src_file})
${ANAKIN_SHARED_INFERENCE_SRCS})
# Create shared inference library defaultly # Create shared inference library defaultly
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
......
cc_library(anakin_engine SRCS engine.cc DEPS framework_proto boost)
cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost)
target_link_libraries(anakin_engine anakin anakin_saber_common)
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
add_subdirectory(convert)
cc_library(anakin_op_converter SRCS fc.cc conv2d.cc conv2d_fusion.cc
elementwise.cc activation.cc pool2d.cc concat.cc split.cc relu.cc softmax.cc
batch_norm.cc reshape.cc flatten.cc transpose.cc density_prior_box.cc
detection_out.cc scale.cc dropout.cc im2sequence.cc sum.cc affine_channel.cc
roi_align.cc shuffle_channel.cc helper.cc DEPS anakin_engine framework_proto
scope op_registry gtest gflags)
cc_test(test_anakin_fc SRCS test_fc_op.cc DEPS anakin_op_converter mul_op)
cc_test(test_anakin_conv2d SRCS test_conv2d_op.cc DEPS anakin_op_converter conv_op im2col vol2col depthwise_conv)
cc_test(test_anakin_activation SRCS test_activation_op.cc DEPS activation_op anakin_op_converter)
cc_test(test_anakin_pool2d SRCS test_pool2d_op.cc DEPS anakin_op_converter pool_op pooling)
cc_test(test_anakin_concat SRCS test_concat_op.cc DEPS anakin_op_converter concat_op concat_and_split)
cc_test(test_anakin_split SRCS test_split_op.cc DEPS anakin_op_converter split_op concat_and_split)
cc_test(test_anakin_elementwise SRCS test_elementwise_op.cc DEPS anakin_op_converter elementwise_add_op elementwise_mul_op)
cc_test(test_anakin_relu SRCS test_relu_op.cc DEPS activation_op anakin_op_converter)
cc_test(test_anakin_softmax SRCS test_softmax_op.cc DEPS anakin_op_converter softmax_op softmax)
cc_test(test_anakin_reshape SRCS test_reshape_op.cc DEPS anakin_op_converter reshape_op)
cc_test(test_anakin_flatten SRCS test_flatten_op.cc DEPS anakin_op_converter flatten_op reshape_op)
cc_test(test_anakin_transpose SRCS test_transpose_op.cc DEPS anakin_op_converter transpose_op)
cc_test(test_anakin_batch_norm SRCS test_batch_norm_op.cc DEPS anakin_op_converter batch_norm_op)
cc_test(test_anakin_dropout SRCS test_dropout_op.cc DEPS anakin_op_converter dropout_op)
cc_test(test_anakin_sum SRCS test_sum_op.cc DEPS anakin_op_converter sum_op selected_rows_functor)
cc_test(test_anakin_affine_channel SRCS test_affine_channel_op.cc DEPS anakin_op_converter affine_channel_op)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/activation.h"
#include <algorithm>
#include <map>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
ActivationOpConverter<TargetT, PrecisionT>::ActivationOpConverter(
const std::string &op_type)
: op_type_(op_type) {
auto it = anakin_op_types_.find(op_type_);
PADDLE_ENFORCE(it != anakin_op_types_.end(),
"activation op type is not support");
anakin_op_type_ = it->second;
}
template <typename TargetT, ::anakin::Precision PrecisionT>
void ActivationOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Activation", {input_name}, {output_name});
this->engine_->AddOpAttr(op_name, "type", anakin_op_type_);
if (op_type_ == "swish") {
float beta = boost::get<float>(op_desc.GetAttr("beta"));
this->engine_->AddOpAttr(op_name, "clip_relu_num", beta);
}
if (op_type_ == "relu6") {
float threshold = boost::get<float>(op_desc.GetAttr("threshold"));
this->engine_->AddOpAttr(op_name, "clip_relu_num", threshold);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(sigmoid, SigmoidOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(tanh, TanhOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(swish, SwishOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(relu6, Relu6OpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ActivationOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
explicit ActivationOpConverter(const std::string &op_type);
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ActivationOpConverter() {}
private:
std::string op_type_;
std::string anakin_op_type_;
std::map<std::string, std::string> anakin_op_types_{{"tanh", "TanH"},
{"sigmoid", "Sigmoid"},
{"relu6", "ClippedRelu"},
{"swish", "Swish"}};
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class TanhOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
TanhOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("tanh") {}
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class SigmoidOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
SigmoidOpConverter()
: ActivationOpConverter<TargetT, PrecisionT>("sigmoid") {}
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class Relu6OpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
Relu6OpConverter() : ActivationOpConverter<TargetT, PrecisionT>("relu6") {}
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class SwishOpConverter : public ActivationOpConverter<TargetT, PrecisionT> {
public:
SwishOpConverter() : ActivationOpConverter<TargetT, PrecisionT>("swish") {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/affine_channel.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void AffineChannelOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "AffineChannel", {input_name}, {output_name});
// Copy the Scale to CPUPlace and get the pointer.
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
PADDLE_ENFORCE_NOT_NULL(scale_v);
auto weight1 = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
// Copy the Bias to CPUPlace and get the pointer.
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(bias_v);
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(affine_channel, AffineChannelOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class AffineChannelOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
AffineChannelOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~AffineChannelOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/batch_norm.h"
#include <math.h>
#include <algorithm>
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void BatchNormOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1);
std::map<std::string, std::string> inputs;
for (auto k : {"X", "Scale", "Bias", "Mean", "Variance"}) {
PADDLE_ENFORCE_EQ(op_desc.Input(k).size(), 1UL);
}
auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Y").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front();
auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon"));
auto bn_op_name = op_name + ":bn";
auto bn_output = bn_op_name + "_output";
this->engine_->AddOp(bn_op_name, "BatchNorm", {input}, {bn_output});
this->engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
this->engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
auto scale_op_name = op_name + ":scale";
this->engine_->AddOp(scale_op_name, "Scale", {bn_output}, {output});
this->engine_->AddOpAttr(scale_op_name, "axis", 1);
this->engine_->AddOpAttr(scale_op_name, "num_axes", 1);
this->engine_->AddOpAttr(scale_op_name, "bias_term", true);
auto *mean_v = scope.FindVar(op_desc.Input("Mean").front());
PADDLE_ENFORCE_NOT_NULL(mean_v);
auto weight1 = pblock_from_var<TargetT, PrecisionT>(*mean_v, this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_1", *weight1);
auto *variance_v = scope.FindVar(op_desc.Input("Variance").front());
PADDLE_ENFORCE_NOT_NULL(variance_v);
auto weight2 =
pblock_from_var<TargetT, PrecisionT>(*variance_v, this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_2", *weight2);
auto *weight3 = pblock_from_vector<TargetT, PrecisionT>(
std::vector<float>({1}), this->engine_);
this->engine_->AddOpAttr(bn_op_name, "weight_3", *weight3);
auto *scale_v = scope.FindVar(op_desc.Input("Scale").front());
PADDLE_ENFORCE_NOT_NULL(scale_v);
auto scale = pblock_from_var<TargetT, PrecisionT>(*scale_v, this->engine_);
this->engine_->AddOpAttr(scale_op_name, "weight_1", *scale);
auto *bias_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(bias_v);
auto bias = pblock_from_var<TargetT, PrecisionT>(*bias_v, this->engine_);
this->engine_->AddOpAttr(scale_op_name, "weight_2", *bias);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(batch_norm, BatchNormOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class BatchNormOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
BatchNormOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~BatchNormOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include <algorithm>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void ConcatOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
int axis = boost::get<int>(op_desc.GetAttr("axis"));
auto input_names = op_desc.Input("X");
auto y_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Concat", input_names, {y_name});
this->engine_->AddOpAttr(op_name, "axis", axis);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(concat, ConcatOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ConcatOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
ConcatOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ConcatOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void Conv2dOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v);
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3];
auto filter_num = weight_tensor->dims()[0];
this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
{filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
this->engine_->AddOpAttr(op_name, "group", groups);
this->engine_->AddOpAttr(op_name, "axis", 1);
this->engine_->AddOpAttr(op_name, "bias_term", false);
::anakin::saber::Shape anakin_shape(weight_shape);
bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
if (enable_int8) {
const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
auto weight_scale =
boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
PBlock<TargetT> *weight1 =
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
float *weight_data = weight_tensor->data<float>();
std::vector<char> weight_int8;
int weight_num = weight_tensor->numel();
for (int i = 0; i < weight_tensor->numel(); i++) {
bool is_valid_int8 =
((weight_data[i] >= -128) && (weight_data[i] <= 127));
PADDLE_ENFORCE(is_valid_int8,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]");
weight_int8.push_back(static_cast<char>(weight_data[i]));
}
memcpy(static_cast<void *>(weight1->h_tensor().mutable_data()),
static_cast<void *>(weight_int8.data()), sizeof(char) * weight_num);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
this->engine_->Graph()->SetOpPrec(op_name, ::anakin::AK_INT8);
this->engine_->Graph()->SetWeightsScale(
op_name, {weight_scale[0] / int8_range}, false);
this->engine_->AddTensorScale(input_name, in_scale / int8_range);
} else {
auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
*weight_tensor, weight_shape, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d, Conv2dOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class Conv2dOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
Conv2dOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~Conv2dOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/conv2d_fusion.h"
#include <algorithm>
#include <memory>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void Conv2dFusionOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL);
auto input_name = op_desc.Input("Input").front();
auto output_name = op_desc.Output("Output").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Output").front();
this->engine_->AddOp(op_name, "Convolution", {input_name}, {output_name});
auto *filter_v = scope.FindVar(op_desc.Input("Filter").front());
PADDLE_ENFORCE_NOT_NULL(filter_v);
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v);
PADDLE_ENFORCE_EQ(weight_tensor->dims().size(), 4UL);
const int filter_h = weight_tensor->dims()[2];
const int filter_w = weight_tensor->dims()[3];
auto filter_num = weight_tensor->dims()[0];
this->engine_->template AddOpAttr<int>(op_name, "filter_num", filter_num);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "kernel_size",
{filter_h, filter_w});
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
auto dilations = boost::get<std::vector<int>>(op_desc.GetAttr("dilations"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilation_rate",
dilations);
const int groups = boost::get<int>(op_desc.GetAttr("groups"));
this->engine_->AddOpAttr(op_name, "group", groups);
this->engine_->AddOpAttr(op_name, "axis", 1);
this->engine_->AddOpAttr(op_name, "bias_term", true);
::anakin::saber::Shape anakin_shape(weight_shape);
bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
if (enable_int8) {
const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
auto weight_scale =
boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
PBlock<TargetT> *weight1 =
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
float *weight_data = weight_tensor->data<float>();
std::vector<char> weight_int8;
int weight_num = weight_tensor->numel();
for (int i = 0; i < weight_tensor->numel(); i++) {
bool is_valid_int8 =
((weight_data[i] >= -128) && (weight_data[i] <= 127));
PADDLE_ENFORCE(is_valid_int8,
"We are in anakin subgraph int8 mode, the weight of conv "
"should be in range [-128, 127]");
weight_int8.push_back(static_cast<char>(weight_data[i]));
}
memcpy(static_cast<void *>(weight1->h_tensor().mutable_data()),
static_cast<void *>(weight_int8.data()), sizeof(char) * weight_num);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
this->engine_->Graph()->SetOpPrec(op_name, ::anakin::AK_INT8);
this->engine_->Graph()->SetWeightsScale(
op_name, {weight_scale[0] / int8_range}, false);
this->engine_->AddTensorScale(input_name, in_scale / int8_range);
} else {
auto weight_tensor = tensor_from_var(*filter_v, platform::CPUPlace());
auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
auto *weight1 = pblock_from_tensor<TargetT, PrecisionT>(
*weight_tensor, weight_shape, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(conv2d_fusion, Conv2dFusionOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class Conv2dFusionOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
Conv2dFusionOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~Conv2dFusionOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/density_prior_box.h"
#include <algorithm>
#include <map>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void DensityPriorBoxOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc& op, const framework::BlockDesc& block_desc,
const framework::Scope& scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
auto input_name = op_desc.Input("Input").front();
auto image_name = op_desc.Input("Image").front();
auto output_name = op_desc.Output("Boxes").front();
auto op_type = op_desc.Type();
auto op_name = op_type + ":" + op_desc.Output("Boxes").front();
// only for density_prior_box
std::vector<float> fixed_sizes = {};
std::vector<float> fixed_ratios = {};
std::vector<int> densities = {};
std::vector<float> min_sizes = {};
std::vector<float> max_sizes = {};
std::vector<float> aspect_ratios = {};
bool is_clip = false;
bool is_flip = false;
if (op_type == "density_prior_box") {
fixed_sizes =
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_sizes"));
fixed_ratios =
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_ratios"));
densities = boost::get<std::vector<int>>(op_desc.GetAttr("densities"));
is_clip = boost::get<bool>(op_desc.GetAttr("clip"));
} else if (op_type == "prior_box") {
min_sizes = boost::get<std::vector<float>>(op_desc.GetAttr("min_sizes"));
max_sizes = boost::get<std::vector<float>>(op_desc.GetAttr("max_sizes"));
aspect_ratios =
boost::get<std::vector<float>>(op_desc.GetAttr("aspect_ratios"));
is_clip = boost::get<bool>(op_desc.GetAttr("clip"));
is_flip = boost::get<bool>(op_desc.GetAttr("flip"));
}
std::vector<float> dens;
for (auto& ele : densities) {
dens.push_back(static_cast<float>(ele));
}
auto variances = boost::get<std::vector<float>>(op_desc.GetAttr("variances"));
// lack img_h, img_w
auto step_h = boost::get<float>(op_desc.GetAttr("step_h"));
auto step_w = boost::get<float>(op_desc.GetAttr("step_w"));
auto offset = boost::get<float>(op_desc.GetAttr("offset"));
PTuple<std::string> t_order;
t_order.push_back("MIN");
t_order.push_back("COM");
t_order.push_back("MAX");
std::vector<float> temp_v = {};
this->engine_->AddOp(op_name, "PriorBox", {input_name, image_name},
{output_name});
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "min_size",
min_sizes);
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "max_size",
max_sizes);
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "aspect_ratio",
aspect_ratios);
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "fixed_size",
fixed_sizes);
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "fixed_ratio",
fixed_ratios);
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "density", dens);
this->engine_->AddOpAttr(op_name, "is_flip", is_flip);
this->engine_->AddOpAttr(op_name, "is_clip", is_clip);
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "variance",
variances);
this->engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0));
this->engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0));
this->engine_->AddOpAttr(op_name, "step_h", step_h);
this->engine_->AddOpAttr(op_name, "step_w", step_w);
this->engine_->AddOpAttr(op_name, "offset", offset);
this->engine_->template AddOpAttr<PTuple<std::string>>(op_name, "order",
t_order);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(density_prior_box, DensityPriorBoxOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(prior_box, DensityPriorBoxOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class DensityPriorBoxOpConverter
: public AnakinOpConverter<TargetT, PrecisionT> {
public:
DensityPriorBoxOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~DensityPriorBoxOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/detection_out.h"
#include <algorithm>
#include <map>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void DetectionOutOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
auto target_name = op_desc.Input("TargetBox").front();
auto prior_box_name = op_desc.Input("PriorBox").front();
auto scores_name = op_desc.Input("Scores").front();
auto output_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto code_type = boost::get<std::string>(op_desc.GetAttr("code_type"));
auto background_label = boost::get<int>(op_desc.GetAttr("background_label"));
auto score_threshold = boost::get<float>(op_desc.GetAttr("score_threshold"));
auto nms_top_k = boost::get<int>(op_desc.GetAttr("nms_top_k"));
auto nms_threshold = boost::get<float>(op_desc.GetAttr("nms_threshold"));
auto nms_eta = boost::get<float>(op_desc.GetAttr("nms_eta"));
auto keep_top_k = boost::get<int>(op_desc.GetAttr("keep_top_k"));
std::string anakin_code_type;
if (code_type == "decode_center_size") {
anakin_code_type = "CENTER_SIZE";
} else if (code_type == "encode_center_size") {
PADDLE_THROW(
"Not support encode_center_size code_type in DetectionOut of anakin");
}
this->engine_->AddOp(op_name, "DetectionOutput",
{target_name, scores_name, prior_box_name},
{output_name});
this->engine_->AddOpAttr(op_name, "share_location", true);
this->engine_->AddOpAttr(op_name, "variance_encode_in_target", false);
this->engine_->AddOpAttr(op_name, "class_num", static_cast<int>(0));
this->engine_->AddOpAttr(op_name, "background_id", background_label);
this->engine_->AddOpAttr(op_name, "keep_top_k", keep_top_k);
this->engine_->AddOpAttr(op_name, "code_type", anakin_code_type);
this->engine_->AddOpAttr(op_name, "conf_thresh", score_threshold);
this->engine_->AddOpAttr(op_name, "nms_top_k", nms_top_k);
this->engine_->AddOpAttr(op_name, "nms_thresh", nms_threshold);
this->engine_->AddOpAttr(op_name, "nms_eta", nms_eta);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(detection_out, DetectionOutOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class DetectionOutOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
DetectionOutOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~DetectionOutOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/dropout.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void DropoutOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Mask").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Scale", {x_name}, {out_name});
auto dropout_prob = boost::get<float>(op_desc.GetAttr("dropout_prob"));
auto factor = 1 - dropout_prob;
auto *weight1 = pblock_from_vector<TargetT, PrecisionT>(
std::vector<float>({factor}), this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
this->engine_->AddOpAttr(op_name, "axis", 0);
this->engine_->AddOpAttr(op_name, "num_axes", 0);
this->engine_->AddOpAttr(op_name, "bias_term", false);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(dropout, DropoutOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class DropoutOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
DropoutOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~DropoutOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void ElementwiseAddOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto y_name = op_desc.Input("Y").front();
auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Add";
this->engine_->template AddOpAttr<std::string>(op_name, "type",
elementwise_type);
std::vector<float> coeff = {1.0, 1.0};
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
}
template <typename TargetT, ::anakin::Precision PrecisionT>
void ElementwiseMulOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto y_name = op_desc.Input("Y").front();
auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Eltwise", {x_name, y_name}, {out_name});
std::string elementwise_type = "Mul";
this->engine_->template AddOpAttr<std::string>(op_name, "type",
elementwise_type);
std::vector<float> coeff = {1.0, 1.0};
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(elementwise_add, ElementwiseAddOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(elementwise_mul, ElementwiseMulOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ElementwiseAddOpConverter
: public AnakinOpConverter<TargetT, PrecisionT> {
public:
ElementwiseAddOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ElementwiseAddOpConverter() {}
private:
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class ElementwiseMulOpConverter
: public AnakinOpConverter<TargetT, PrecisionT> {
public:
ElementwiseMulOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ElementwiseMulOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/fc.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void FcBaseOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
auto input_names = op_desc.InputNames();
bool with_bias = input_names.size() >= 3;
std::string w_name = "Y";
std::string i_name = "X";
if (with_bias) {
w_name = "W";
i_name = "Input";
}
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
// get weights
auto *y_v = scope.FindVar(op_desc.Input(w_name).front());
PADDLE_ENFORCE_NOT_NULL(y_v);
auto weight_tensor = tensor_from_var(*y_v, platform::CPUPlace());
auto weight_shape = framework::vectorize<int>(weight_tensor->dims());
int out_dim = weight_shape[1];
const int w_m = weight_shape[0];
const int w_k = weight_shape[1];
auto input_name = op_desc.Input(i_name).front();
auto output_name = op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Dense", {input_name}, {output_name});
this->engine_->AddOpAttr(op_name, "bias_term", with_bias);
this->engine_->AddOpAttr(op_name, "axis", 1);
this->engine_->AddOpAttr(op_name, "out_dim", out_dim);
auto *weight_data = weight_tensor->data<float>();
PADDLE_ENFORCE(w_m * w_k == weight_tensor->numel());
std::vector<float> trans_weight_data(weight_tensor->numel());
for (int i = 0; i < w_m; i++) {
for (int j = 0; j < w_k; j++) {
trans_weight_data[i + j * w_m] = weight_data[i * w_k + j];
}
}
int weight_num = weight_tensor->numel();
bool enable_int8 = boost::get<bool>(op_desc.HasAttr("enable_int8"));
if (enable_int8) {
if (weight_shape.size() < 4UL) {
weight_shape.insert(weight_shape.begin(), 4UL - weight_shape.size(), 1);
}
::anakin::saber::Shape anakin_shape(weight_shape);
const float int8_range = 127.;
float in_scale = boost::get<float>(op_desc.GetAttr("input_scale"));
auto weight_scale =
boost::get<std::vector<float>>(op_desc.GetAttr("weight_scale"));
PBlock<TargetT> *weight1 =
new PBlock<TargetT>(anakin_shape, ::anakin::AK_INT8);
this->engine_->RegistBlock(weight1);
std::vector<char> weight_int8;
for (int i = 0; i < weight_num; i++) {
bool is_valid_int8 =
((trans_weight_data[i] >= -128) && (trans_weight_data[i] <= 127));
PADDLE_ENFORCE(is_valid_int8,
"We are in anakin subgraph int8 mode, the weight of fc "
"should be in range [-128, 127]");
weight_int8.push_back(static_cast<char>(trans_weight_data[i]));
}
memcpy(static_cast<void *>(weight1->h_tensor().mutable_data()),
static_cast<void *>(weight_int8.data()), sizeof(char) * weight_num);
weight1->d_tensor().set_shape(anakin_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
this->engine_->Graph()->SetOpPrec(op_name, ::anakin::AK_INT8);
this->engine_->Graph()->SetWeightsScale(
op_name, {weight_scale[0] / int8_range}, false);
this->engine_->AddTensorScale(input_name, in_scale / int8_range);
} else {
auto *weight1 = pblock_from_vector<TargetT, PrecisionT>(trans_weight_data,
this->engine_);
this->engine_->AddOpAttr(op_name, "weight_1", *weight1);
}
// get bias
if (with_bias) {
auto *b_v = scope.FindVar(op_desc.Input("Bias").front());
PADDLE_ENFORCE_NOT_NULL(b_v);
auto weight2 = pblock_from_var<TargetT, PrecisionT>(*b_v, this->engine_);
this->engine_->AddOpAttr(op_name, "weight_2", *weight2);
}
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(mul, MulOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(fc, FcOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class FcBaseOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
FcBaseOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~FcBaseOpConverter() {}
};
// with bias
template <typename TargetT, ::anakin::Precision PrecisionT>
class FcOpConverter : public FcBaseOpConverter<TargetT, PrecisionT> {
public:
FcOpConverter() = default;
};
// without bias
template <typename TargetT, ::anakin::Precision PrecisionT>
class MulOpConverter : public FcBaseOpConverter<TargetT, PrecisionT> {
public:
MulOpConverter() = default;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/flatten.h"
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void FlattenOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL);
auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Out").front();
int axis = boost::get<int>(op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis == 1,
"the anakin flatten op converter now only support aixs == 1.");
std::vector<int> out_dims = {0, -1, 1, 1};
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Reshape", {input}, {output});
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dims", out_dims);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(flatten, FlattenOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class FlattenOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
FlattenOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~FlattenOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/helper.h"
namespace paddle {
namespace inference {
namespace anakin {
std::unique_ptr<framework::LoDTensor> tensor_from_var(
const framework::Variable& var, const platform::Place& place) {
auto& src = var.Get<framework::LoDTensor>();
std::unique_ptr<framework::LoDTensor> dst(new framework::LoDTensor());
dst->Resize(src.dims());
TensorCopySync((src), place, dst.get());
return dst;
}
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "saber/saber_types.h"
using anakin::saber::Shape;
using anakin::AK_FLOAT;
using anakin::AK_INT8;
using anakin::PBlock;
namespace paddle {
namespace inference {
namespace anakin {
std::unique_ptr<framework::LoDTensor> tensor_from_var(
const framework::Variable& var, const platform::Place& place);
template <typename TargetT, ::anakin::Precision PrecisionT>
PBlock<TargetT>* pblock_from_tensor(const framework::LoDTensor& tensor,
std::vector<int> shape_vec,
AnakinEngine<TargetT, PrecisionT>* engine) {
while (shape_vec.size() < 4) {
shape_vec.insert(shape_vec.begin(), 1);
}
Shape shape(shape_vec);
PBlock<TargetT>* weight = new PBlock<TargetT>(shape, AK_FLOAT);
engine->RegistBlock(weight);
float* cpu_data = static_cast<float*>(weight->h_tensor().mutable_data());
std::copy_n(tensor.data<float>(), tensor.numel(), cpu_data);
weight->d_tensor().set_shape(shape);
weight->d_tensor().copy_from(weight->h_tensor());
return weight;
}
template <typename TargetT, ::anakin::Precision PrecisionT>
PBlock<TargetT>* pblock_from_vector(const std::vector<float>& vec,
std::vector<int> shape_vec,
AnakinEngine<TargetT, PrecisionT>* engine) {
while (shape_vec.size() < 4) {
shape_vec.insert(shape_vec.begin(), 1);
}
Shape shape(shape_vec);
PBlock<TargetT>* weight = new PBlock<TargetT>(shape, AK_FLOAT);
engine->RegistBlock(weight);
auto* weight_data = static_cast<float*>(weight->h_tensor().mutable_data());
std::copy(std::begin(vec), std::end(vec), weight_data);
weight->d_tensor().set_shape(shape);
weight->d_tensor().copy_from(weight->h_tensor());
return weight;
}
template <typename TargetT, ::anakin::Precision PrecisionT>
PBlock<TargetT>* pblock_from_vector(const std::vector<float>& vec,
AnakinEngine<TargetT, PrecisionT>* engine) {
int size = vec.size();
return pblock_from_vector<TargetT, PrecisionT>(
vec, std::vector<int>({1, 1, 1, size}), engine);
}
template <typename TargetT, ::anakin::Precision PrecisionT>
PBlock<TargetT>* pblock_from_var(const framework::Variable& var,
AnakinEngine<TargetT, PrecisionT>* engine) {
auto tensor = tensor_from_var(var, platform::CPUPlace());
auto shape = framework::vectorize<int>(tensor->dims());
return pblock_from_tensor<TargetT, PrecisionT>(*tensor, shape, engine);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void Im2SequenceConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 0);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Im2Sequence", {x_name}, {out_name});
std::vector<int> dilations = {1, 1};
auto paddings = boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
auto strides = boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
auto kernels = boost::get<std::vector<int>>(op_desc.GetAttr("kernels"));
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "paddings", paddings);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "window_size",
kernels);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dilations",
dilations);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(im2sequence, Im2SequenceConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class Im2SequenceConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
Im2SequenceConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~Im2SequenceConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "framework/core/types.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "saber/saber_types.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class AnakinOpConverter {
using AnakinEngineT = AnakinEngine<TargetT, PrecisionT>;
public:
AnakinOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {}
void ConvertOp(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const std::unordered_set<std::string> &parameters,
const framework::Scope &scope, AnakinEngineT *engine,
bool test_mode = false) {
framework::OpDesc op_desc(op, nullptr);
std::string op_type = op_desc.Type();
AnakinOpConverter *it = nullptr;
if (op_type == "depthwise_conv2d") op_type = "conv2d";
if (op_type == "reshape2") op_type = "reshape";
if (op_type == "transpose2") op_type = "transpose";
if (op_type == "flatten2") op_type = "flatten";
if (!it) {
it = Registry<AnakinOpConverter>::Global().Lookup(op_type);
}
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", op_type);
it->SetEngine(engine);
(*it)(op, block_desc, scope, test_mode);
}
void ConvertBlock(framework::BlockDesc *block_desc,
const std::unordered_set<std::string> &parameters,
const framework::Scope &scope, AnakinEngineT *engine) {
std::unique_lock<std::mutex> lock(mutex_);
framework::proto::BlockDesc *block = block_desc->Proto();
for (auto i = 0; i < block->ops_size(); i++) {
auto &op = block->ops(i);
ConvertOp(op, *block_desc, parameters, scope, engine);
}
}
// The scope here should be inited with the parameter vars.
void ConvertBlockToAnakinEngine(
framework::BlockDesc *block_desc, framework::Scope *scope,
const std::vector<std::string> &inputs,
const std::unordered_set<std::string> &parameters,
const std::vector<std::string> &outputs, AnakinEngineT *engine) {
ConvertBlock(block_desc, parameters, *scope, engine);
// if the max_batch size
int max_batch_size = engine->GetMaxBatchSize();
PADDLE_ENFORCE(max_batch_size > 0,
"the max_batch_size setted from config->EnableAnakinEngine "
"must largger than 0");
// If the user does not specify this variable, we use the input shape from
// the block_desc.
auto max_input_shape = engine->GetMaxInputShape();
std::map<std::string, std::vector<int>> temp_max_input_shape;
// Register outputs with anakin using the RegistVar interface before Freeze.
// Note that RegistVar's parameters can only be outputs, not inputs.
for (auto &output : outputs) {
engine->Graph()->RegistVar(output);
}
engine->Freeze();
// Add scale for tensor in int8 mode.
auto tensor_scales = engine->GetTensorScales();
for (auto &item : tensor_scales) {
engine->Graph()->SetVarScale(item.first, item.second);
}
for (auto &input : inputs) {
if (parameters.count(input)) continue;
std::vector<int> input_shape;
input_shape.resize(4);
input_shape[0] = max_batch_size;
if (max_input_shape.count(input)) {
PADDLE_ENFORCE(max_input_shape[input].size() == 4,
"the dimensions of max_input_shape setted from "
"config->EnableAnakinEngine must be 4");
for (int i = 1; i < 4; i++) {
input_shape[i] = max_input_shape[input][i];
}
} else {
auto *var = block_desc->FindVar(input);
PADDLE_ENFORCE(var, "no variable called %s", input);
auto var_shape = var->GetShape();
std::cout << "input :" << input << std::endl;
PADDLE_ENFORCE(var_shape.size() == 4);
for (size_t i = 1; i < var_shape.size(); i++) {
input_shape[i] = var_shape[i];
}
}
temp_max_input_shape[input] = input_shape;
engine->SetInputShape(input, input_shape);
}
engine->SetMaxInputShape(temp_max_input_shape);
engine->Optimize();
engine->InitNet();
}
void SetEngine(AnakinEngineT *engine) { engine_ = engine; }
virtual ~AnakinOpConverter() {}
protected:
bool test_mode_;
AnakinEngineT *engine_{nullptr};
private:
std::unordered_map<std::string, AnakinOpConverter<TargetT, PrecisionT> *>
converters_;
framework::Scope *scope_{nullptr};
std::mutex mutex_;
};
template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::NV,
::anakin::Precision::INT8>;
#ifdef ANAKIN_X86_PLACE
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinOpConverter<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
#define REGISTER_ANAKIN_OP_CONVERTER_BASE(op_type__, Converter__, \
place_type__, place_class__, \
precision_type__, precision_class__) \
struct anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
: public ::paddle::framework::Registrar { \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter() { \
LOG(INFO) << "register convert " << #op_type__ << " "; \
::paddle::inference::Registry< \
::paddle::inference::anakin::AnakinOpConverter< \
place_class__, precision_class__>>::Global() \
.Register<Converter__>(#op_type__); \
} \
}; \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__; \
int Touch_anakin_##op_type__##_##place_type__##_##precision_type__() { \
anakin_##op_type__##_##place_type__##_##precision_type__##_converter__ \
.Touch(); \
return 0; \
}
#define WRAP(...) __VA_ARGS__
#define REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, \
precision_type__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE( \
op_type__, \
::paddle::inference::anakin::Converter__<WRAP( \
::anakin::saber::NV, ::anakin::Precision::precision_type__)>, \
CUDA, ::anakin::saber::NV, precision_type__, \
::anakin::Precision::precision_type__)
#define REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, \
precision_type__) \
REGISTER_ANAKIN_OP_CONVERTER_BASE( \
op_type__, \
::paddle::inference::anakin::Converter__<WRAP( \
::anakin::saber::X86, ::anakin::Precision::precision_type__)>, \
CPU, ::anakin::saber::X86, precision_type__, \
::anakin::Precision::precision_type__)
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CPU_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define REGISTER_ANAKIN_OP_CONVERTER(op_type__, Converter__) \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, FP32); \
REGISTER_CUDA_ANAKIN_OP_CONVERTER(op_type__, Converter__, INT8)
#endif
#define USE_ANAKIN_CONVERTER_BASE(op_type__, place_type__, precision_type__) \
extern int Touch_anakin_##op_type__##_##place_type__##_##precision_type__(); \
int use_converter_anakin_##op_type__##_##place_type__##_##precision_type__ \
UNUSED = \
Touch_anakin_##op_type__##_##place_type__##_##precision_type__();
#if defined(PADDLE_WITH_CUDA) && defined(ANAKIN_X86_PLACE)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CPU, INT8)
#elif defined(PADDLE_WITH_CUDA)
#define USE_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, FP32)
#define USE_INT8_ANAKIN_CONVERTER(op_type__) \
USE_ANAKIN_CONVERTER_BASE(op_type__, CUDA, INT8)
#endif
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/pool2d.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void Pool2dOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto x_name = op_desc.Input("X").front();
auto y_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
bool global_pooling = boost::get<bool>(op_desc.GetAttr("global_pooling"));
std::string pool_type =
boost::get<std::string>(op_desc.GetAttr("pooling_type"));
std::vector<int> ksize =
boost::get<std::vector<int>>(op_desc.GetAttr("ksize"));
std::vector<int> strides =
boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
std::vector<int> paddings =
boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
bool ceil_mode = boost::get<bool>(op_desc.GetAttr("ceil_mode"));
std::string anakin_pool_type;
if (pool_type == "max") {
anakin_pool_type = "MAX";
} else if (pool_type == "avg") {
if (paddings[0] || paddings[1]) {
anakin_pool_type = "AVGEXC";
} else {
anakin_pool_type = "AVG";
}
} else {
PADDLE_THROW("TensorRT unsupported pooling type!");
}
this->engine_->AddOp(op_name, "Pooling", {x_name}, {y_name});
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "pool_size", ksize);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "strides", strides);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "padding", paddings);
this->engine_->AddOpAttr(op_name, "method", anakin_pool_type);
this->engine_->AddOpAttr(op_name, "global_pooling", global_pooling);
this->engine_->AddOpAttr(op_name, "cmp_out_shape_floor_as_conv", !ceil_mode);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(pool2d, Pool2dOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class Pool2dOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
Pool2dOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~Pool2dOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include <algorithm>
#include <map>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void ReluOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "ReLU", {input_name}, {output_name});
this->engine_->AddOpAttr(op_name, "alpha", 0);
}
template <typename TargetT, ::anakin::Precision PrecisionT>
void LeakyReluOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
float alpha = boost::get<float>(op_desc.GetAttr("alpha"));
this->engine_->AddOp(op_name, "ReLU", {input_name}, {output_name});
this->engine_->AddOpAttr(op_name, "alpha", alpha);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(relu, ReluOpConverter);
REGISTER_ANAKIN_OP_CONVERTER(leaky_relu, LeakyReluOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ReluOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
ReluOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ReluOpConverter() {}
};
template <typename TargetT, ::anakin::Precision PrecisionT>
class LeakyReluOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
LeakyReluOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~LeakyReluOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/reshape.h"
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void ReshapeOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL);
auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Reshape", {input}, {output});
auto shape = boost::get<std::vector<int>>(op_desc.GetAttr("shape"));
if (shape.size() < 4) {
shape.insert(shape.end(), 4 - shape.size(), 1);
}
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dims", shape);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(reshape, ReshapeOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ReshapeOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
ReshapeOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ReshapeOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/roi_align.h"
#include <algorithm>
#include <map>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void RoiAlignOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("ROIs").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_x_name = op_desc.Input("X").front();
auto input_rois_name = op_desc.Input("ROIs").front();
auto output_name = op_desc.Output("Out").front();
auto spatial_scale = boost::get<float>(op_desc.GetAttr("spatial_scale"));
auto pooled_height = boost::get<int>(op_desc.GetAttr("pooled_height"));
auto pooled_width = boost::get<int>(op_desc.GetAttr("pooled_width"));
auto sampling_ratio = boost::get<int>(op_desc.GetAttr("sampling_ratio"));
this->engine_->AddOp(op_name, "RoiAlign", {input_x_name, input_rois_name},
{output_name});
this->engine_->AddOpAttr(op_name, "spatial_scale", spatial_scale);
this->engine_->AddOpAttr(op_name, "pooled_height", pooled_height);
this->engine_->AddOpAttr(op_name, "pooled_width", pooled_width);
this->engine_->AddOpAttr(op_name, "sampling_ratio", sampling_ratio);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(roi_align, RoiAlignOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class RoiAlignOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
RoiAlignOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~RoiAlignOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/scale.h"
#include <algorithm>
#include <map>
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void ScaleOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_name = op_desc.Input("X").front();
auto output_name = op_desc.Output("Out").front();
float scale = boost::get<float>(op_desc.GetAttr("scale"));
float bias = boost::get<float>(op_desc.GetAttr("bias"));
float bias_after_scale =
boost::get<bool>(op_desc.GetAttr("bias_after_scale"));
PADDLE_ENFORCE(bias_after_scale,
"The anakin scale layer only support bias after scale now.");
this->engine_->AddOp(op_name, "Power", {input_name}, {output_name});
this->engine_->AddOpAttr(op_name, "shift", bias);
this->engine_->AddOpAttr(op_name, "scale", scale);
this->engine_->AddOpAttr(op_name, "power", static_cast<float>(1.0));
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(scale, ScaleOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ScaleOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
ScaleOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ScaleOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/shuffle_channel.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void ShuffleChannelOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "ShuffleChannel", {input}, {output});
auto group = boost::get<int>(op_desc.GetAttr("group"));
this->engine_->AddOpAttr(op_name, "group", group);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(shuffle_channel, ShuffleChannelOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class ShuffleChannelOpConverter
: public AnakinOpConverter<TargetT, PrecisionT> {
public:
ShuffleChannelOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~ShuffleChannelOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/softmax.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void SoftMaxOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL);
auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
auto input_var_desc = block_desc.FindVar(input);
PADDLE_ENFORCE(input_var_desc,
"Cant find %s variable When runing Anakin Softmax converter.",
input);
auto input_shape_in_fluid = input_var_desc->GetShape();
size_t input_dims = input_shape_in_fluid.size();
this->engine_->AddOp(op_name, "Softmax", {input}, {output});
this->engine_->AddOpAttr(op_name, "axis", static_cast<int>(input_dims - 1));
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(softmax, SoftMaxOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class SoftMaxOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
SoftMaxOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~SoftMaxOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/split.h"
#include <algorithm>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void SplitOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
auto input_name = op_desc.Input("X").front();
auto y_names = op_desc.Output("Out");
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
int axis = boost::get<int>(op_desc.GetAttr("axis"));
std::vector<int> output_lengths =
boost::get<std::vector<int>>(op_desc.GetAttr("sections"));
int split_num = output_lengths.size();
PADDLE_ENFORCE(split_num > 1,
"anakin split op converter: the split num should > 1");
int num_sum = 0;
std::vector<int> slice_point;
for (int i = 0; i < split_num - 1; i++) {
num_sum += output_lengths[i];
slice_point.push_back(num_sum);
}
this->engine_->AddOp(op_name, "Slice", {input_name}, y_names);
this->engine_->AddOpAttr(op_name, "axis", axis);
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "slice_point",
slice_point);
// slice_dim is useless in anakin
this->engine_->AddOpAttr(op_name, "slice_dim", 4);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(split, SplitOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class SplitOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
SplitOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~SplitOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/sum.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void SumOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 2);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto input_names = op_desc.Input("X");
auto out_name = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
std::vector<float> coeff = {1, 1};
std::string elementwise_type = "Add";
this->engine_->AddOp(op_name, "Eltwise", input_names, {out_name});
this->engine_->template AddOpAttr<PTuple<float>>(op_name, "coeff", coeff);
this->engine_->template AddOpAttr<std::string>(op_name, "type",
elementwise_type);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(sum, SumOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class SumOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
SumOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~SumOpConverter() {}
private:
};
} // namespace anakin
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/activation.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
static void test_activation_op(const std::string& op_type,
const platform::DeviceContext& context,
bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc;
desc.SetType(op_type);
desc.SetInput("X", {"act-X"});
desc.SetOutput("Out", {"act-Out"});
if (op_type == "swish") {
desc.SetAttr("beta", 1.0f);
}
if (op_type == "relu6") {
desc.SetAttr("threshold", 6.0f);
}
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(5);
}
#ifdef PADDLE_WITH_CUDA
TEST(sigm_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_activation_op<::anakin::saber::NV>("sigmoid", ctx, true);
}
TEST(tanh_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_activation_op<::anakin::saber::NV>("tanh", ctx, true);
}
TEST(relu6_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_activation_op<::anakin::saber::NV>("relu6", ctx, true);
}
TEST(swish_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_activation_op<::anakin::saber::NV>("swish", ctx, true);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(sigmoid);
USE_OP(tanh);
USE_OP(relu6);
USE_OP(swish);
USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh);
USE_ANAKIN_CONVERTER(relu6);
USE_ANAKIN_CONVERTER(swish);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/affine_channel.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_affine_channel_op(const platform::DeviceContext& context,
bool use_gpu) {
// Declare the difference between the inputs.
std::unordered_set<std::string> parameters({"scale", "bias"});
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("x", {1, 3, 5, 2});
validator.DeclOutputVar("out", {1, 3, 5, 2});
validator.DeclParamVar("scale", {3});
validator.DeclParamVar("bias", {3});
// Prepare Op descriptions.
framework::OpDesc desc;
desc.SetType("affine_channel");
desc.SetInput("X", {"x"});
desc.SetInput("Bias", {"bias"});
desc.SetInput("Scale", {"scale"});
desc.SetOutput("Out", {"out"});
// Layout must be explicitly specified here as NCHW.
desc.SetAttr("data_layout", std::string("NCHW"));
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(affine_channel_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_affine_channel_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(affine_channel_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_affine_channel_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(affine_channel);
USE_ANAKIN_CONVERTER(affine_channel);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_batchnorm_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters(
{"batch_norm_scale", "batch_norm_bias", "batch_norm_mean",
"batch_norm_variance"});
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
std::vector<int> param_shape{2};
validator.DeclInputVar("batch_norm_X", {1, 2, 5, 5});
validator.DeclParamVar("batch_norm_scale", param_shape);
validator.DeclParamVar("batch_norm_bias", param_shape);
validator.DeclParamVar("batch_norm_mean", param_shape);
validator.DeclParamVar("batch_norm_variance", param_shape);
validator.DeclOutputVar("batch_norm_Y", {1, 2, 5, 5});
validator.DeclOutputVar("batch_norm_save_mean", param_shape);
validator.DeclOutputVar("batch_norm_save_variance", param_shape);
// Prepare Op description
framework::OpDesc desc;
desc.SetType("batch_norm");
desc.SetInput("X", {"batch_norm_X"});
desc.SetInput("Scale", {"batch_norm_scale"});
desc.SetInput("Bias", {"batch_norm_bias"});
desc.SetInput("Mean", {"batch_norm_mean"});
desc.SetInput("Variance", {"batch_norm_variance"});
desc.SetOutput("Y", {"batch_norm_Y"});
desc.SetOutput("MeanOut", {"batch_norm_mean"});
desc.SetOutput("VarianceOut", {"batch_norm_variance"});
desc.SetOutput("SavedMean", {"batch_norm_save_mean"});
desc.SetOutput("SavedVariance", {"batch_norm_save_variance"});
float eps = 1e-5f;
bool is_test = true;
desc.SetAttr("epsilon", eps);
desc.SetAttr("is_test", is_test);
validator.SetOp(*desc.Proto());
std::unordered_set<std::string> neglected_output = {
"batch_norm_save_mean", "batch_norm_save_variance", "batch_norm_mean",
"batch_norm_variance"};
validator.Execute(1, neglected_output);
}
#ifdef PADDLE_WITH_CUDA
TEST(batch_norm_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_batchnorm_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(batch_norm_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_batchnorm_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(batch_norm);
USE_ANAKIN_CONVERTER(batch_norm);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/concat.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_concat_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters({""});
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("concat_x1", {1, 2, 1, 1});
validator.DeclInputVar("concat_x2", {1, 3, 1, 1});
validator.DeclInputVar("concat_x3", {1, 1, 1, 1});
validator.DeclOutputVar("concat_out", {1, 6, 1, 1});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("concat");
desc.SetInput("X", {"concat_x1", "concat_x2", "concat_x3"});
desc.SetOutput("Out", {"concat_out"});
int axis = 1;
desc.SetAttr("axis", axis);
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(concat_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_concat_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(concat_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_concat_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(concat);
USE_ANAKIN_CONVERTER(concat);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/conv2d.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_conv2d_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters({"conv2d-Y"});
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("conv2d-X", {1, 3, 3, 3});
validator.DeclParamVar("conv2d-Y", {4, 3, 1, 1});
validator.DeclOutputVar("conv2d-Out", {1, 4, 3, 3});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("conv2d");
desc.SetInput("Input", {"conv2d-X"});
desc.SetInput("Filter", {"conv2d-Y"});
desc.SetOutput("Output", {"conv2d-Out"});
const std::vector<int> strides({1, 1});
const std::vector<int> paddings({0, 0});
const std::vector<int> dilations({1, 1});
const int groups = 1;
desc.SetAttr("strides", strides);
desc.SetAttr("paddings", paddings);
desc.SetAttr("dilations", dilations);
desc.SetAttr("groups", groups);
validator.SetOp(*desc.Proto());
validator.Execute(3);
}
#ifdef PADDLE_WITH_CUDA
TEST(conv2d_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_conv2d_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(conv2d_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_conv2d_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(conv2d);
USE_ANAKIN_CONVERTER(conv2d);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/dropout.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_dropout_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("x", {1, 1, 2, 2});
validator.DeclOutputVar("out", {1, 1, 2, 2});
validator.DeclOutputVar("mask", {1, 1, 2, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("dropout");
desc.SetInput("X", {"x"});
desc.SetOutput("Out", {"out"});
desc.SetOutput("Mask", {"mask"});
float dropout_prob = 0.5;
desc.SetAttr("dropout_prob", dropout_prob);
desc.SetAttr("is_test", true);
validator.SetOp(*desc.Proto());
std::unordered_set<std::string> neglected_output = {"mask"};
validator.Execute(1, neglected_output);
}
#ifdef PADDLE_WITH_CUDA
TEST(dropout_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_dropout_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(dropout_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_dropout_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(dropout);
USE_ANAKIN_CONVERTER(dropout);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/elementwise.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
static void test_elementwise_op(const std::string& op_type,
const platform::DeviceContext& context,
bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("x", {1, 1, 2, 2});
validator.DeclInputVar("y", {1, 1, 2, 2});
validator.DeclOutputVar("out", {1, 1, 2, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType(op_type);
desc.SetInput("X", {"x"});
desc.SetInput("Y", {"y"});
desc.SetOutput("Out", {"out"});
int axis = -1;
desc.SetAttr("axis", axis);
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(elementwise_op, native_add_gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_elementwise_op<::anakin::saber::NV>("elementwise_add", ctx, true);
}
TEST(elementwise_op, native_mul_gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_elementwise_op<::anakin::saber::NV>("elementwise_mul", ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(elementwise_op, native_add_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_add", ctx, false);
}
TEST(elementwise_op, native_mul_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_elementwise_op<::anakin::saber::X86>("elementwise_mul", ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(elementwise_add);
USE_OP(elementwise_mul);
USE_ANAKIN_CONVERTER(elementwise_add);
USE_ANAKIN_CONVERTER(elementwise_mul);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_mul_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters({"mul_y"});
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("mul_x", {1, 1, 2, 2});
validator.DeclParamVar("mul_y", {4, 2});
validator.DeclOutputVar("mul_out", {1, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("mul");
desc.SetInput("X", {"mul_x"});
desc.SetInput("Y", {"mul_y"});
desc.SetOutput("Out", {"mul_out"});
validator.SetOp(*desc.Proto());
validator.Execute(10);
}
#ifdef PADDLE_WITH_CUDA
TEST(mul_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_mul_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(mul_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_mul_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(mul);
USE_ANAKIN_CONVERTER(fc);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_flatten_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("flatten-X", {3, 10, 10, 4});
validator.DeclOutputVar("flatten-Out", {3, 400, 1, 1});
framework::OpDesc desc;
desc.SetType("flatten");
desc.SetInput("X", {"flatten-X"});
desc.SetOutput("Out", {"flatten-Out"});
desc.SetAttr("axis", 1);
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(5);
}
#ifdef PADDLE_WITH_CUDA
TEST(flatten_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_flatten_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(flatten_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_flatten_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(reshape);
USE_OP_ITSELF(flatten);
USE_ANAKIN_CONVERTER(flatten);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/im2sequence.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
TEST(im2sequence_op, native) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation validator(parameters, &scope);
std::vector<int> kernels = {6, 1};
std::vector<int> strides = {1, 1};
std::vector<int> paddings = {0, 0, 0, 0};
validator.DeclInputVar("x", {1, 1, 2, 2});
validator.DeclOutputVar("out", {1, 1 * kernels[0] * kernels[1]});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("im2sequence");
desc.SetInput("X", {"x"});
desc.SetOutput("Out", {"out"});
desc.SetAttr("kernels", kernels);
desc.SetAttr("strides", strides);
desc.SetAttr("paddings", paddings);
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(im2sequence);
USE_ANAKIN_CONVERTER(im2sequence);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_pool2d(const platform::DeviceContext& context, bool use_gpu,
bool global_pooling, bool ceil_mode,
std::string pool_type = "max") {
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
// The ITensor's Dims should not contain the batch size.
// So, the ITensor's Dims of input and output should be C * H * W.
validator.DeclInputVar("pool2d_x", {1, 3, 6, 7});
if (global_pooling)
validator.DeclOutputVar("pool2d_out", {1, 3, 1, 1});
else if (ceil_mode)
validator.DeclOutputVar("pool2d_out", {1, 3, 3, 4});
else
validator.DeclOutputVar("pool2d_out", {1, 3, 3, 3});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("pool2d");
desc.SetInput("X", {"pool2d_x"});
desc.SetOutput("Out", {"pool2d_out"});
std::vector<int> ksize({2, 2});
std::vector<int> strides({2, 2});
std::vector<int> paddings({0, 0});
std::string pooling_t = pool_type;
desc.SetAttr("pooling_type", pooling_t);
desc.SetAttr("ksize", ksize);
desc.SetAttr("strides", strides);
desc.SetAttr("paddings", paddings);
desc.SetAttr("global_pooling", global_pooling);
desc.SetAttr("ceil_mode", ceil_mode);
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(Pool2dOpConverter, normal) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_pool2d<::anakin::saber::NV>(ctx, true, false, false);
}
TEST(Pool2dOpConverter, test_global_pooling) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_pool2d<::anakin::saber::NV>(ctx, true, true, false);
}
TEST(Pool2dOpConverter, max_ceil_test) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_pool2d<::anakin::saber::NV>(ctx, true, false, true);
}
TEST(Pool2dOpConverter, avg_ceil_test) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_pool2d<::anakin::saber::NV>(ctx, true, false, true, "avg");
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(Pool2dOpConverter, normal_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_pool2d<::anakin::saber::X86>(ctx, false, false, false);
}
TEST(Pool2dOpConverter, test_global_pooling_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_pool2d<::anakin::saber::X86>(ctx, false, true, false);
}
TEST(Pool2dOpConverter, max_ceil_test_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_pool2d<::anakin::saber::X86>(ctx, false, false, true);
}
TEST(Pool2dOpConverter, avg_ceil_test_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_pool2d<::anakin::saber::X86>(ctx, false, false, true, "avg");
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(pool2d);
USE_ANAKIN_CONVERTER(pool2d);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/relu.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
static void test_activation_op(const std::string& op_type,
const platform::DeviceContext& context,
bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("act-X", {10, 6, 1, 1});
validator.DeclOutputVar("act-Out", {10, 6, 1, 1});
framework::OpDesc desc;
desc.SetType(op_type);
desc.SetInput("X", {"act-X"});
desc.SetOutput("Out", {"act-Out"});
if (op_type == "leaky_relu") {
desc.SetAttr("alpha", 0.1f);
}
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(5);
}
#ifdef PADDLE_WITH_CUDA
TEST(relu_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_activation_op<::anakin::saber::NV>("relu", ctx, true);
}
TEST(leaky_relu_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_activation_op<::anakin::saber::NV>("leaky_relu", ctx, true);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(relu);
USE_OP(leaky_relu);
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(leaky_relu);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_reshape1_op(const platform::DeviceContext& context, bool use_gpu) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
// validator.DeclInputVar("reshape-X", {2, 3, 3, 1});
// validator.DeclOutputVar("reshape-Out", {3, 2, 1, 3});
validator.DeclInputVar("reshape-X", {1, 2, 4, 1});
validator.DeclOutputVar("reshape-Out", {1, 8, 1, 1});
framework::OpDesc desc;
desc.SetType("reshape");
desc.SetInput("X", {"reshape-X"});
desc.SetOutput("Out", {"reshape-Out"});
// desc.SetAttr("shape", std::vector<int>({3, 2, 1, 3}));
desc.SetAttr("shape", std::vector<int>({1, 8, 1, 1}));
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
}
template <typename TargetT>
void test_reshape2_op(const platform::DeviceContext& context, bool use_gpu) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("reshape-X", {1, 2, 4});
validator.DeclOutputVar("reshape-Out", {1, 4, 2});
framework::OpDesc desc;
desc.SetType("reshape");
desc.SetInput("X", {"reshape-X"});
desc.SetOutput("Out", {"reshape-Out"});
// desc.SetAttr("shape", std::vector<int>({3, 2, 1, 3}));
desc.SetAttr("shape", std::vector<int>({0, -1, 2}));
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(reshape1_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_reshape1_op<::anakin::saber::NV>(ctx, true);
}
TEST(reshape2_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_reshape2_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(reshape1_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_reshape2_op<::anakin::saber::X86>(ctx, false);
}
TEST(reshape2_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_reshape2_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(reshape);
USE_ANAKIN_CONVERTER(reshape);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_softmax_op(const platform::DeviceContext& context, bool use_gpu) {
framework::Scope scope;
std::unordered_set<std::string> parameters;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("softmax-X", {1, 10, 2});
validator.DeclOutputVar("softmax-Out", {1, 10, 2});
framework::OpDesc desc;
desc.SetType("softmax");
desc.SetInput("X", {"softmax-X"});
desc.SetOutput("Out", {"softmax-Out"});
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(softmax_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_softmax_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(relu_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_softmax_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(softmax);
USE_ANAKIN_CONVERTER(softmax);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/split.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, int Axis>
void AnakinSliceTest(const platform::DeviceContext &context, bool use_gpu,
const std::vector<int> &in_shape,
const std::vector<int> &sections) {
std::unordered_set<std::string> parameters({""});
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("split_input", in_shape);
std::vector<std::string> output_vars;
for (size_t i = 0; i < sections.size(); ++i) {
auto out_shape = in_shape;
out_shape[Axis] = sections[i];
std::string output_name = "split_out" + std::to_string(i);
validator.DeclOutputVar(output_name, out_shape);
output_vars.push_back(output_name);
}
// Prepare Op description
framework::OpDesc desc;
desc.SetType("split");
desc.SetInput("X", {"split_input"});
desc.SetOutput("Out", output_vars);
desc.SetAttr("axis", Axis);
desc.SetAttr("num", 0);
desc.SetAttr("sections", sections);
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
// batch = 0, axis = 1, same shape
TEST(split_op, test_same_shape_axis1_batch1) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
AnakinSliceTest<::anakin::saber::NV, 1>(ctx, true, {1, 4, 2, 2}, {2, 2});
}
// batch = 0, axis = 1, different shape
TEST(split_op, test_different_shape_axis1_batch1) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
AnakinSliceTest<::anakin::saber::NV, 1>(ctx, true, {1, 3, 2, 2}, {2, 1});
}
// batch = 0, axis = 2, same shape
TEST(split_op, test_same_shape_axis2_batch1) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
AnakinSliceTest<::anakin::saber::NV, 2>(ctx, true, {1, 3, 4, 2}, {2, 2});
}
// batch = 0, axis = 2, different shape
TEST(split_op, test_different_shape_axis2_batch1) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
AnakinSliceTest<::anakin::saber::NV, 2>(ctx, true, {1, 3, 3, 2}, {2, 1});
}
// batch = 0, axis = 3, same shape
TEST(split_op, test_same_shape_axis3_batch1) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
AnakinSliceTest<::anakin::saber::NV, 3>(ctx, true, {1, 3, 2, 4}, {2, 2});
}
// batch = 0, axis = 3, different shape
TEST(split_op, test_different_shape_axis3_batch1) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
AnakinSliceTest<::anakin::saber::NV, 3>(ctx, true, {1, 3, 2, 3}, {2, 1});
}
#ifdef ANAKIN_X86_PLACE
TEST(split_op, test_different_shape_axis1_batch1_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
AnakinSliceTest<::anakin::saber::X86, 1>(ctx, false, {1, 3, 2, 3}, {2, 1});
}
TEST(split_op, test_different_shape_axis2_batch1_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
AnakinSliceTest<::anakin::saber::X86, 2>(ctx, false, {1, 3, 4, 2}, {2, 2});
}
TEST(split_op, test_different_shape_axis3_batch1_cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
AnakinSliceTest<::anakin::saber::X86, 3>(ctx, false, {1, 3, 2, 4}, {2, 2});
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(split);
USE_ANAKIN_CONVERTER(split);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/sum.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
#include "paddle/fluid/operators/sum_op.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
static void test_sum_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("sum_x1", {1, 2, 1, 2});
validator.DeclInputVar("sum_x2", {1, 2, 1, 2});
validator.DeclOutputVar("sum_out", {1, 2, 1, 2});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("sum");
desc.SetInput("X", {"sum_x1", "sum_x2"});
desc.SetOutput("Out", {"sum_out"});
validator.SetOp(*desc.Proto());
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(sum_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_sum_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(sum_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_sum_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(sum);
USE_ANAKIN_CONVERTER(sum);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/convert/ut_helper.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT>
void test_transpose1_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("transpose-X", {2, 3, 4, 5});
validator.DeclOutputVar("transpose-Out", {4, 2, 5, 3});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("transpose");
desc.SetInput("X", {"transpose-X"});
desc.SetOutput("Out", {"transpose-Out"});
desc.SetAttr("axis", std::vector<int>({2, 0, 3, 1}));
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(3);
}
template <typename TargetT>
void test_transpose2_op(const platform::DeviceContext& context, bool use_gpu) {
std::unordered_set<std::string> parameters;
framework::Scope scope;
AnakinConvertValidation<TargetT, ::anakin::Precision::FP32> validator(
parameters, &scope, context, use_gpu);
validator.DeclInputVar("transpose-X", {3, 4, 5});
validator.DeclOutputVar("transpose-Out", {3, 5, 4});
// Prepare Op description
framework::OpDesc desc;
desc.SetType("transpose");
desc.SetInput("X", {"transpose-X"});
desc.SetOutput("Out", {"transpose-Out"});
desc.SetAttr("axis", std::vector<int>({0, 2, 1}));
LOG(INFO) << "set OP";
validator.SetOp(*desc.Proto());
LOG(INFO) << "execute";
validator.Execute(1);
}
#ifdef PADDLE_WITH_CUDA
TEST(transpose1_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_transpose1_op<::anakin::saber::NV>(ctx, true);
}
TEST(transpose2_op, gpu) {
platform::CUDAPlace gpu_place(0);
platform::CUDADeviceContext ctx(gpu_place);
test_transpose2_op<::anakin::saber::NV>(ctx, true);
}
#endif
#ifdef ANAKIN_X86_PLACE
TEST(transpose1_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_transpose2_op<::anakin::saber::X86>(ctx, false);
}
TEST(transpose2_op, cpu) {
platform::CPUPlace cpu_place;
platform::CPUDeviceContext ctx(cpu_place);
test_transpose2_op<::anakin::saber::X86>(ctx, false);
}
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
USE_OP(transpose);
USE_ANAKIN_CONVERTER(transpose);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/convert/transpose.h"
#include <algorithm>
#include <string>
#include <vector>
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
void TransposeOpConverter<TargetT, PrecisionT>::operator()(
const framework::proto::OpDesc &op, const framework::BlockDesc &block_desc,
const framework::Scope &scope, bool test_mode) {
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
auto input = op_desc.Input("X").front();
auto output = op_desc.Output("Out").front();
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
this->engine_->AddOp(op_name, "Permute", {input}, {output});
auto axis = boost::get<std::vector<int>>(op_desc.GetAttr("axis"));
size_t axis_size = axis.size();
while (axis.size() < 4) {
axis.push_back(axis_size);
axis_size += 1;
}
this->engine_->template AddOpAttr<PTuple<int>>(op_name, "dims", axis);
}
} // namespace anakin
} // namespace inference
} // namespace paddle
REGISTER_ANAKIN_OP_CONVERTER(transpose, TransposeOpConverter);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionT>
class TransposeOpConverter : public AnakinOpConverter<TargetT, PrecisionT> {
public:
TransposeOpConverter() = default;
virtual void operator()(const framework::proto::OpDesc &op,
const framework::BlockDesc &block_desc,
const framework::Scope &scope,
bool test_mode) override;
virtual ~TransposeOpConverter() {}
};
} // namespace anakin
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gtest/gtest.h>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/enforce.h"
using anakin::Precision;
namespace paddle {
namespace inference {
namespace anakin {
/*
* Get a random float value between [low, high]
*/
float random(float low, float high) {
static std::random_device rd;
static std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(low, high);
return dist(mt);
}
void RandomizeTensor(framework::LoDTensor* tensor,
const platform::Place& place) {
auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0);
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
temp_tensor.Resize(dims);
auto* temp_data = temp_tensor.mutable_data<float>(cpu_place);
for (size_t i = 0; i < num_elements; i++) {
*(temp_data + i) = random(0., 1.);
}
TensorCopySync(temp_tensor, place, tensor);
}
/*
* Help to validate the correctness between Fluid Op and the corresponding
* anakin
* layer.
*/
template <typename TargetT, ::anakin::Precision PrecisionT>
class AnakinConvertValidation {
using AnakinNvEngineT = AnakinEngine<TargetT, PrecisionT>;
public:
AnakinConvertValidation() = delete;
AnakinConvertValidation(const std::unordered_set<std::string>& parameters,
framework::Scope* scope,
const platform::DeviceContext& ctx,
bool use_gpu = true)
: parameters_(parameters), scope_(scope), ctx_(ctx), use_gpu_(use_gpu) {
engine_.reset(new AnakinEngine<TargetT, PrecisionT>(true));
}
// Declare a Variable as input with random initialization.
void DeclInputVar(const std::string& name,
const std::vector<int> tensor_dims) {
DeclVar(name, tensor_dims);
// should decalre anakin input here.
}
void DeclParamVar(const std::string& name, const std::vector<int> dim_vec) {
DeclVar(name, dim_vec);
}
void DeclOutputVar(const std::string& name, const std::vector<int> dim_vec) {
DeclVar(name, dim_vec);
// should declare anakin output here.
}
void DeclVar(const std::string& name, const std::vector<int> dim_vec) {
auto* x = scope_->Var(name);
auto* x_tensor = x->GetMutable<framework::LoDTensor>();
x_tensor->Resize(framework::make_ddim(dim_vec));
RandomizeTensor(x_tensor, ctx_.GetPlace());
std::vector<int64_t> dim_vec_int64;
for (auto& ele : dim_vec) {
dim_vec_int64.push_back(static_cast<int64_t>(ele));
}
// Add var_desc to block_desc
auto* block_desc = program_desc_.MutableBlock(framework::kRootBlockIndex);
auto* var_desc = block_desc->Var(name);
var_desc->SetShape(dim_vec_int64);
}
void SetOp(const framework::proto::OpDesc& desc) {
op_ = framework::OpRegistry::CreateOp(desc);
op_desc_.reset(new framework::OpDesc(desc, nullptr));
// should init anakin engine here.
auto& block_desc = program_desc_.Block(framework::kRootBlockIndex);
Singleton<AnakinOpConverter<TargetT, PrecisionT>>::Global().ConvertOp(
desc, block_desc, parameters_, *scope_, engine_.get(),
true /*test_mode*/);
engine_->Freeze();
std::map<std::string, std::vector<int>> temp_max_input_shape;
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto& t = inference::analysis::GetFromScope<framework::LoDTensor>(*scope_,
input);
auto t_shape = framework::vectorize<int>(t.dims());
while (t_shape.size() < 4) {
t_shape.push_back(1);
}
engine_->SetInputShape(input, t_shape);
temp_max_input_shape[input] = t_shape;
}
engine_->SetMaxInputShape(temp_max_input_shape);
engine_->Optimize();
engine_->InitNet();
}
// We use the set 'neglected_output' here, because some Ops like batch norm,
// the outputs specified in the op des are only used during training,
// so we should neglect those output during inference.
void Execute(int batch_size,
std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op
op_->Run(*scope_, ctx_.GetPlace());
std::map<std::string, framework::LoDTensor*> inputs;
for (const auto& input : op_desc_->InputArgumentNames()) {
if (parameters_.count(input)) continue;
auto* var = scope_->FindVar(input);
auto tensor = var->GetMutable<framework::LoDTensor>();
inputs.insert({input, tensor});
}
std::map<std::string, framework::LoDTensor*> outputs;
std::vector<std::vector<float>> fluid_outputs;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> fluid_out;
auto* var = scope_->FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx_, &fluid_out);
fluid_outputs.push_back(fluid_out);
outputs.insert({output, tensor});
}
if (!use_gpu_) {
engine_->Execute(inputs, outputs);
} else {
cudaStream_t stream;
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream), 0);
engine_->Execute(inputs, outputs, stream);
}
int i_output = 0;
for (const auto& output : op_desc_->OutputArgumentNames()) {
if (neglected_output.count(output)) continue;
std::vector<float> anakin_out;
auto* var = scope_->FindVar(output);
auto tensor = var->GetMutable<framework::LoDTensor>();
framework::TensorToVector(*tensor, ctx_, &anakin_out);
size_t anakin_out_size = anakin_out.size();
auto fluid_out = fluid_outputs[i_output++];
for (size_t i = 0; i < anakin_out_size; i++) {
EXPECT_LT(std::abs(fluid_out[i] - anakin_out[i]), 1e-3);
}
}
}
private:
std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
std::unique_ptr<framework::OperatorBase> op_;
std::unique_ptr<framework::OpDesc> op_desc_;
framework::ProgramDesc program_desc_;
const std::unordered_set<std::string>& parameters_;
framework::Scope* scope_;
const platform::DeviceContext& ctx_;
bool use_gpu_{true};
};
template class AnakinConvertValidation<::anakin::saber::NV,
::anakin::Precision::FP32>;
template class AnakinConvertValidation<::anakin::saber::NV,
::anakin::Precision::INT8>;
#ifdef ANAKIN_X86_PLACE
template class AnakinConvertValidation<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinConvertValidation<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/engine.h"
#include <algorithm>
#include <cstring>
#include <map>
#include <utility>
#include "paddle/fluid/framework/ddim.h"
using anakin::Precision;
using anakin::OpRunType;
using paddle::framework::LoDTensor;
template <typename T, Precision P, OpRunType O>
using AnakinNetT = anakin::Net<T, P, O>;
template <typename T, Precision P>
using AnakinGraphT = anakin::graph::Graph<T, P>;
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
extern std::once_flag
AnakinEngine<TargetT, PrecisionType, RunType>::init_anakin_;
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::AnakinEngine(
bool need_summary, int device, int max_batch_size,
std::map<std::string, std::vector<int>> max_input_shape,
std::vector<std::string> program_inputs, bool auto_config_layout)
: device_(device),
max_batch_size_(max_batch_size),
max_input_shape_(max_input_shape),
program_inputs_(program_inputs),
auto_config_layout_(auto_config_layout) {
::anakin::TargetWrapper<TargetT>::set_device(device_);
std::call_once(init_anakin_,
[this]() { ::anakin::Env<TargetT>::env_init(); });
graph_.reset(new AnakinGraphT<TargetT, PrecisionType>());
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(need_summary));
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
AnakinEngine<TargetT, PrecisionType, RunType>::~AnakinEngine() {}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::SetInputShape(
const std::string &name, std::vector<int> shape) {
graph_->AddOpAttr<::anakin::PTuple<int>>(name, "input_shape",
std::move(shape));
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::InitNet() {
net_->init(*graph_, auto_config_layout_);
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::AddOp(
const std::string &name, const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs) {
PADDLE_ENFORCE(graph_->AddOp(name, type, inputs, outputs), "Add operation.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::BindInput(
const std::map<std::string, framework::LoDTensor *> &inputs) {
#ifdef PADDLE_WITH_CUDA
cudaDeviceSynchronize();
#endif
for (const auto &input : inputs) {
auto *tensor = input.second;
auto *data = tensor->data<float>();
auto fluid_input_shape = framework::vectorize<int>(tensor->dims());
while (fluid_input_shape.size() < 4) {
fluid_input_shape.push_back(1);
}
auto *anakin_input = net_->get_in(input.first);
std::vector<int> max_input_shape = max_input_shape_[input.first];
int max_shape_sum =
std::accumulate(max_input_shape.begin(), max_input_shape.end(), 1,
std::multiplies<int>());
if (tensor->numel() > max_shape_sum) {
PADDLE_ENFORCE(std::find(program_inputs_.begin(), program_inputs_.end(),
input.first) == program_inputs_.end(),
"The anakin input max shape should be greater than"
" or equal to the real input shape, Please set the max "
"input shape using EnableAnakinEngine");
VLOG(3) << "Anakin Net will be reset because of the inputs out of range: "
<< input.first;
graph_->Reshape(input.first, fluid_input_shape);
net_.reset(new AnakinNetT<TargetT, PrecisionType, RunType>(true));
net_->init(*graph_);
anakin_input = net_->get_in(input.first);
}
anakin_input->reshape(fluid_input_shape);
::anakin::saber::Tensor<TargetT> tmp_anakin_tensor(data, TargetT(), device_,
fluid_input_shape);
anakin_input->copy_from(tmp_anakin_tensor);
}
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs) {
BindInput(inputs);
net_->prediction();
for (const auto &output : outputs) {
platform::CPUPlace cpu_place;
auto *tensor = output.second;
auto *anakin_output = net_->get_out(output.first);
auto *anakin_data = anakin_output->data();
auto anakin_output_shape = anakin_output->valid_shape();
tensor->Resize(framework::make_ddim(anakin_output_shape));
auto *fluid_data = tensor->mutable_data<float>(cpu_place);
memory::Copy(cpu_place, static_cast<void *>(fluid_data), cpu_place,
static_cast<void *>(anakin_data),
tensor->numel() * sizeof(float));
}
}
#ifdef PADDLE_WITH_CUDA
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs,
cudaStream_t stream) {
BindInput(inputs);
net_->prediction();
cudaDeviceSynchronize();
for (const auto &output : outputs) {
platform::CUDAPlace gpu_place(device_);
auto *tensor = output.second;
auto *anakin_output = net_->get_out(output.first);
auto *anakin_data = anakin_output->data();
auto anakin_output_shape = anakin_output->valid_shape();
tensor->Resize(framework::make_ddim(anakin_output_shape));
auto *fluid_data = tensor->mutable_data<float>(gpu_place);
memory::Copy(gpu_place, static_cast<void *>(fluid_data), gpu_place,
static_cast<void *>(anakin_data),
tensor->numel() * sizeof(float), stream);
}
cudaDeviceSynchronize();
}
#endif
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Freeze() {
PADDLE_ENFORCE(graph_->Freeze(), "Freeze anakin subgraph.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::Optimize() {
PADDLE_ENFORCE(graph_->Optimize(), "Graph optimization.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
void AnakinEngine<TargetT, PrecisionType, RunType>::RegistBlock(
::anakin::PBlock<TargetT> *block_p) {
PADDLE_ENFORCE(graph_->RegistBlock(block_p), "Block register.");
}
template <typename TargetT, Precision PrecisionType, OpRunType RunType>
std::unique_ptr<AnakinEngine<TargetT, PrecisionType, RunType>>
AnakinEngine<TargetT, PrecisionType, RunType>::Clone() {
auto *engine = new AnakinEngine();
engine->net_ = std::move(net_->Clone());
return std::unique_ptr<AnakinEngine>(engine);
}
#ifdef PADDLE_WITH_CUDA
template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::FP32>;
template class AnakinEngineManager<::anakin::saber::NV,
::anakin::Precision::FP32>;
template class AnakinEngine<::anakin::saber::NV, ::anakin::Precision::INT8>;
template class AnakinEngineManager<::anakin::saber::NV,
::anakin::Precision::INT8>;
#endif
#ifdef ANAKIN_X86_PLACE
template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
template class AnakinEngineManager<::anakin::saber::X86,
::anakin::Precision::FP32>;
template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::INT8>;
template class AnakinEngineManager<::anakin::saber::X86,
::anakin::Precision::INT8>;
#endif
// template class AnakinEngine<::anakin::saber::X86, ::anakin::Precision::FP32>;
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#ifdef EXIT // NOLINT
#undef EXIT // NOLINT
#endif // NOLINT
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
#include "framework/graph/graph_global_mem.h"
#include "saber/saber_types.h"
using anakin::Precision;
namespace anakin {
template <typename, Precision, OpRunType>
class Net;
namespace graph {
template <typename, Precision>
class Graph;
} // namespace graph
} // namespace anakin
namespace paddle {
namespace inference {
namespace anakin {
template <typename TargetT, ::anakin::Precision PrecisionType,
::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
class AnakinEngine {
using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;
public:
explicit AnakinEngine(
bool need_summary = false, int device = 0, int max_batch_size = 1,
std::map<std::string, std::vector<int>> max_input_shape = {},
std::vector<std::string> program_inputs = {},
bool auto_config_layout = false);
~AnakinEngine();
void InitNet();
void SetInputShape(const std::string &name, std::vector<int> shape);
void AddOp(const std::string &name, const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs);
template <typename T>
void AddOpAttr(const std::string &op_name, const std::string &attr_name,
const T &attr_value) {
PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
"Add operation's attribution.");
}
NetT *Net() { return net_.get(); }
GraphT *Graph() { return graph_.get(); }
std::unique_ptr<AnakinEngine> Clone();
const std::map<std::string, std::vector<int>> &GetMaxInputShape() {
return max_input_shape_;
}
void SetMaxInputShape(std::map<std::string, std::vector<int>> shape) {
max_input_shape_ = shape;
}
const std::vector<std::string> &GetScalableInputs() {
return program_inputs_;
}
void SetScalableInputs(std::vector<std::string> program_inputs) {
program_inputs_ = program_inputs;
}
int GetMaxBatchSize() { return max_batch_size_; }
void Freeze();
void Optimize();
void RegistBlock(::anakin::PBlock<TargetT> *block_p);
void Save(std::string path) { graph_->save(path); }
bool IsInit() { return initialized_; }
int GetDevice() { return device_; }
void AddTensorScale(const std::string &tensor_name, float scale) {
tensor_scales_[tensor_name] = scale;
}
std::unordered_map<std::string, float> GetTensorScales() {
return tensor_scales_;
}
void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs);
#ifdef PADDLE_WITH_CUDA
void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs,
cudaStream_t stream);
#endif
private:
void BindInput(const std::map<std::string, framework::LoDTensor *> &inputs);
private:
bool initialized_{false};
int device_;
int max_batch_size_;
std::map<std::string, std::vector<int>> max_input_shape_;
std::vector<std::string> program_inputs_;
std::unique_ptr<GraphT> graph_;
std::unique_ptr<NetT> net_;
static std::once_flag init_anakin_;
std::unordered_map<std::string, float> tensor_scales_;
// Always be false in gpu mode but true in most cpu cases.
bool auto_config_layout_;
};
template <typename TargetT, ::anakin::Precision PrecisionType>
class AnakinEngineManager {
using AnakinEngineT = AnakinEngine<TargetT, PrecisionType>;
public:
bool HasEngine(const std::string &name) const {
if (engines_.count(name) == 0) return false;
return engines_.at(name).get() != nullptr;
}
AnakinEngineT *Get(const std::string &name) const {
return engines_.at(name).get();
}
AnakinEngineT *Create(bool need_summary, int device, int max_batch_size,
std::map<std::string, std::vector<int>> max_input_shape,
std::vector<std::string> program_inputs,
bool auto_config_layout, std::string engine_name) {
std::unique_lock<std::mutex> lk(mut_);
auto *p = new AnakinEngine<TargetT, PrecisionType>(
need_summary, device, max_batch_size, max_input_shape, program_inputs,
auto_config_layout);
engines_[engine_name].reset(p);
return p;
}
void DeleteALL() {
for (auto &item : engines_) {
item.second.reset(nullptr);
}
}
private:
std::unordered_map<std::string, std::unique_ptr<AnakinEngineT>> engines_;
std::mutex mut_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/anakin/op_teller.h"
namespace paddle {
namespace inference {
namespace anakin {
// Just tell by the op_types.
struct SimpleOpTypeSetTeller : public Teller {
SimpleOpTypeSetTeller() {
teller_set.insert("mul");
teller_set.insert("fc");
teller_set.insert("conv2d_fusion");
teller_set.insert("split");
teller_set.insert("relu");
teller_set.insert("pool2d");
teller_set.insert("elementwise_add");
teller_set.insert("elementwise_mul");
teller_set.insert("concat");
teller_set.insert("tanh");
teller_set.insert("conv2d");
teller_set.insert("batch_norm");
teller_set.insert("softmax");
teller_set.insert("flatten2");
teller_set.insert("reshape2");
teller_set.insert("transpose2");
teller_set.insert("density_prior_box");
teller_set.insert("detection_out");
teller_set.insert("dropout");
teller_set.insert("sigmoid");
teller_set.insert("sum");
teller_set.insert("depthwise_conv2d");
teller_set.insert("prior_box");
teller_set.insert("leaky_relu");
teller_set.insert("affine_channel");
teller_set.insert("relu6");
teller_set.insert("swish");
teller_set.insert("shuffle_channel");
}
bool operator()(const std::string& op_type,
const framework::OpDesc& desc) override {
return teller_set.count(op_type);
}
private:
std::unordered_set<std::string> teller_set;
};
bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc) {
for (auto& teller : tellers_) {
if (op_type == "pool2d" || op_type == "conv2d" ||
op_type == "depthwise_conv2d" || op_type == "conv2d_transpose") {
std::vector<int> paddings =
boost::get<std::vector<int>>(desc.GetAttr("paddings"));
if (paddings.size() > 2) return false;
}
if ((*teller)(op_type, desc)) return true;
}
return false;
}
OpTeller::OpTeller() { tellers_.emplace_back(new SimpleOpTypeSetTeller); }
} // namespace anakin
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
namespace paddle {
namespace inference {
namespace anakin {
/*
* Single Op teller definition.
* One can override this and define a more complex tell logic, considerring more
* issues such as op_desc.
*/
struct Teller {
virtual bool operator()(const std::string& op_type,
const framework::OpDesc& desc) = 0;
virtual ~Teller() = default;
};
/*
* A real example:
*
* struct SomeTeller : public Teller {
* bool operator()(const std::string& op_type,
* const framework::OpDesc& desc) override {
* return op_type == "fc" && desc.Inputs().size() == 2;
* }
*};
*/
/*
* class OpTeller helps to tell whether a fluid
* operator can be transformed to a TensorRT layer.
*/
class OpTeller {
public:
static OpTeller& Global() {
static std::unique_ptr<OpTeller> x(new OpTeller);
return *x;
}
bool Tell(const std::string& op_type, const framework::OpDesc& desc);
private:
OpTeller();
private:
std::vector<std::unique_ptr<Teller>> tellers_;
};
} // namespace anakin
} // namespace inference
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <map>
#include "paddle/fluid/inference/anakin/engine.h"
using anakin::AK_FLOAT;
using anakin::Precision;
using anakin::saber::NV;
using anakin::saber::Shape;
using anakin::PBlock;
using anakin::PTuple;
namespace paddle {
namespace inference {
namespace anakin {
class TestAnakinEngine : public ::testing::Test {
protected:
void SetUp() override;
void TearDown() override {}
protected:
using AnakinNvEngineT = AnakinEngine<NV, Precision::FP32>;
std::unique_ptr<AnakinNvEngineT> engine_{nullptr};
};
void TestAnakinEngine::SetUp() {
engine_.reset(new AnakinEngine<NV, Precision::FP32>(true));
}
TEST_F(TestAnakinEngine, Execute) {
engine_->AddOp("op1", "Dense", {"x"}, {"y"});
engine_->AddOpAttr("op1", "out_dim", 2);
engine_->AddOpAttr("op1", "bias_term", false);
engine_->AddOpAttr("op1", "axis", 1);
std::vector<int> shape = {1, 1, 1, 2};
Shape tmp_shape(shape);
PBlock<NV> *weight1 = new PBlock<NV>(tmp_shape, AK_FLOAT);
engine_->RegistBlock(weight1);
float *cpu_data = static_cast<float *>(weight1->h_tensor().mutable_data());
cpu_data[0] = 2.;
weight1->d_tensor().set_shape(tmp_shape);
weight1->d_tensor().copy_from(weight1->h_tensor());
engine_->AddOpAttr("op1", "weight_1", *weight1);
engine_->Freeze();
// PTuple<int> input_shape = {1};
// engine_->AddOpAttr("x", "input_shape", input_shape);
engine_->SetInputShape("x", {1, 1, 1, 1});
engine_->Optimize();
engine_->InitNet();
framework::LoDTensor x;
framework::LoDTensor y;
x.Resize({1, 1, 1, 1});
y.Resize({1, 1, 1, 2});
auto *x_data = x.mutable_data<float>(platform::CUDAPlace());
float x_data_cpu[] = {1.};
cudaMemcpy(x_data, x_data_cpu, sizeof(float), cudaMemcpyHostToDevice);
std::map<std::string, framework::LoDTensor *> inputs = {{"x", &x}};
auto *y_data = y.mutable_data<float>(platform::CUDAPlace());
std::map<std::string, framework::LoDTensor *> outputs = {{"y", &y}};
cudaStream_t stream;
cudaStreamCreate(&stream);
engine_->Execute(inputs, outputs, stream);
auto *y_data_gpu = y_data;
float y_data_cpu[2];
cudaMemcpy(y_data_cpu, y_data_gpu, sizeof(float) * 2, cudaMemcpyDeviceToHost);
LOG(INFO) << "output value: " << y_data_cpu[0] << ", " << y_data_cpu[1];
}
} // namespace anakin
} // namespace inference
} // namespace paddle
...@@ -59,7 +59,6 @@ struct Argument { ...@@ -59,7 +59,6 @@ struct Argument {
using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>; using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
using fusion_statis_t = std::unordered_map<std::string, int>; using fusion_statis_t = std::unordered_map<std::string, int>;
using anakin_max_shape_t = std::map<std::string, std::vector<int>>;
bool Has(const std::string& key) const { return valid_fields_.count(key); } bool Has(const std::string& key) const { return valid_fields_.count(key); }
// If we set the model using config.SetModelBuffer, // If we set the model using config.SetModelBuffer,
...@@ -184,19 +183,6 @@ struct Argument { ...@@ -184,19 +183,6 @@ struct Argument {
bool); bool);
DECL_ARGUMENT_FIELD(tensorrt_use_calib_mode, TensorRtUseCalibMode, bool); DECL_ARGUMENT_FIELD(tensorrt_use_calib_mode, TensorRtUseCalibMode, bool);
DECL_ARGUMENT_FIELD(anakin_max_input_shape, AnakinMaxInputShape,
anakin_max_shape_t);
DECL_ARGUMENT_FIELD(anakin_max_batch_size, AnakinMaxBatchSize, int);
DECL_ARGUMENT_FIELD(anakin_min_subgraph_size, AnakinMinSubgraphSize, int);
DECL_ARGUMENT_FIELD(anakin_precision_mode, AnakinPrecisionMode,
AnalysisConfig::Precision);
DECL_ARGUMENT_FIELD(anakin_auto_config_layout, AnakinAutoConfigLayout, bool);
DECL_ARGUMENT_FIELD(use_anakin, UseAnakin, bool);
DECL_ARGUMENT_FIELD(anakin_passes_filter, AnakinPassesFilter,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(anakin_ops_filter, AnakinOpsFilter,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(lite_passes_filter, LitePassesFilter, DECL_ARGUMENT_FIELD(lite_passes_filter, LitePassesFilter,
std::vector<std::string>); std::vector<std::string>);
DECL_ARGUMENT_FIELD(lite_ops_filter, LiteOpsFilter, std::vector<std::string>); DECL_ARGUMENT_FIELD(lite_ops_filter, LiteOpsFilter, std::vector<std::string>);
......
...@@ -139,24 +139,6 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -139,24 +139,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("enable_int8", new bool(enable_int8)); pass->Set("enable_int8", new bool(enable_int8));
pass->Set("use_gpu", new bool(argument->use_gpu())); pass->Set("use_gpu", new bool(argument->use_gpu()));
} }
if (pass_name == "anakin_subgraph_pass") {
pass->Set("program",
new framework::ProgramDesc *(&argument->main_program()));
pass->Set("use_gpu", new bool(argument->use_gpu()));
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("predictor_id", new int(argument->predictor_id()));
pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
argument->anakin_max_input_shape()));
pass->Set("max_batch_size", new int(argument->anakin_max_batch_size()));
bool enable_int8 =
argument->anakin_precision_mode() == AnalysisConfig::Precision::kInt8;
pass->Set("enable_int8", new bool(enable_int8));
pass->Set("anakin_ops_filter",
new std::vector<std::string>(argument->anakin_ops_filter()));
pass->Set("auto_config_layout",
new bool(argument->anakin_auto_config_layout()));
}
disable_logs_ = argument->disable_logs(); disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") { if (pass_name == "fc_fuse_pass") {
pass->Set("use_gpu", new bool(argument->use_gpu())); pass->Set("use_gpu", new bool(argument->use_gpu()));
......
...@@ -12,18 +12,6 @@ if (WITH_GPU AND TENSORRT_FOUND) ...@@ -12,18 +12,6 @@ if (WITH_GPU AND TENSORRT_FOUND)
set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "")
endif() endif()
if (ANAKIN_SUBGRAPH)
cc_library(anakin_subgraph_pass SRCS anakin_subgraph_pass.cc DEPS subgraph_util anakin_op_teller)
set(analysis_deps ${analysis_deps}
subgraph_util anakin_subgraph_pass
CACHE INTERNAL "")
set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h)
file(APPEND ${pass_file} "USE_PASS(anakin_subgraph_pass);\n")
set(INFER_IR_PASSES ${INFER_IR_PASSES} anakin_subgraph_pass CACHE INTERNAL "")
endif()
if (WITH_LITE) if (WITH_LITE)
cc_library(lite_subgraph_pass SRCS lite_subgraph_pass.cc DEPS ${analysis_deps} subgraph_util lite_op_teller) cc_library(lite_subgraph_pass SRCS lite_subgraph_pass.cc DEPS ${analysis_deps} subgraph_util lite_op_teller)
set(analysis_deps ${analysis_deps} subgraph_util lite_subgraph_pass CACHE INTERNAL "") set(analysis_deps ${analysis_deps} subgraph_util lite_subgraph_pass CACHE INTERNAL "")
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/subgraph_detector.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/op_teller.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h"
#include "paddle/fluid/string/pretty_log.h"
namespace paddle {
namespace inference {
namespace analysis {
using framework::ir::Node;
void analysis::AnakinSubgraphPass::ApplyImpl(
framework::ir::Graph *graph) const {
framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph);
auto &anakin_ops_filter = Get<std::vector<std::string>>("anakin_ops_filter");
auto teller = [&anakin_ops_filter](const framework::ir::Node *node) {
if (!node->IsOp() || !node->Op())
return false;
else if (std::find(anakin_ops_filter.begin(), anakin_ops_filter.end(),
node->Op()->Type()) != anakin_ops_filter.end())
return false;
return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op());
};
framework::ir::SubGraphFuser fuser(graph, teller, 6 /* min_subgraph_size */);
fuser();
std::vector<std::string> graph_param_names =
ExtractParameters(graph->Nodes());
// those parameter already exist in anakin, and should not have another copy
// in fluid.
std::vector<std::string> repetitive_params;
for (auto *node : graph->Nodes()) {
if (node->IsOp() && !framework::ir::Agent(node).subgraph()->empty()) {
CreateAnakinOp(node, graph, graph_param_names, &repetitive_params);
std::unordered_set<const Node *> nodes2remove(
framework::ir::Agent(node).subgraph()->begin(),
framework::ir::Agent(node).subgraph()->end());
framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
}
}
std::unordered_set<const Node *> nodes2remove;
for (auto *node : graph->Nodes()) {
if (node->IsOp() && framework::ir::Agent(node).deleted()) {
nodes2remove.insert(node);
}
}
framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
graph->Set(framework::ir::kRepetitiveParamAttr,
new std::vector<std::string>(repetitive_params));
}
std::string GenerateAnakinEngineKey(const std::set<std::string> &engine_inputs,
const std::set<std::string> &engine_outputs,
std::string id) {
std::string engine_hash_key = "";
for (auto name : engine_inputs) {
engine_hash_key += name;
}
for (auto name : engine_outputs) {
engine_hash_key += name;
}
engine_hash_key += id;
auto engine_key = std::to_string(std::hash<std::string>()(engine_hash_key));
return engine_key;
}
void AnakinSubgraphPass::CreateAnakinOp(
framework::ir::Node *node, framework::ir::Graph *graph,
const std::vector<std::string> &graph_params,
std::vector<std::string> *repetitive_params) const {
auto *op_desc = node->Op();
auto &subgraph = *framework::ir::Agent(node).subgraph();
PADDLE_ENFORCE(!subgraph.empty());
framework::ProgramDesc *program_desc =
Get<framework::ProgramDesc *>("program");
// Add new block for TensorRTEngineOP
const framework::BlockDesc &main_block =
program_desc->Block(framework::kRootBlockIndex);
// const framework::BlockDesc& main_block = program_desc->Block(0);
framework::BlockDesc *new_block = program_desc->AppendBlock(main_block);
// An fake block desc.
framework::proto::BlockDesc block_proto;
framework::BlockDesc block_desc(nullptr, &block_proto);
block_desc.Proto()->set_parent_idx(-1);
block_desc.Proto()->set_idx(0);
string::PrettyLogDetail("--- detect a sub-graph with %d nodes",
subgraph.size());
for (auto *node : subgraph) {
auto *new_block_op = new_block->AppendOp();
auto *op = block_desc.AppendOp();
*new_block_op->Proto() = *node->Op()->Proto();
*op->Proto() = *node->Op()->Proto();
}
// Then, we will use the input_names_with_id and output_names_with_id to
// generate the eigine key.
// So, We use set instead of unordered_set here to ensure that the engine key
// is unique.
std::set<std::string> input_names;
std::set<std::string> input_names_with_id;
std::vector<std::string> params;
for (auto *x : node->inputs) {
input_names.insert(x->Name());
input_names_with_id.insert(x->Name() + std::to_string(x->id()));
if (std::count(graph_params.begin(), graph_params.end(), x->Name()) > 0) {
params.push_back(x->Name());
}
}
std::copy(params.begin(), params.end(),
std::back_inserter(*repetitive_params));
op_desc->SetInput(
"Xs", std::vector<std::string>(input_names.begin(), input_names.end()));
std::set<std::string> output_names;
std::set<std::string> output_names_with_id;
for (auto *x : node->outputs) {
output_names.insert(x->Name());
output_names_with_id.insert(x->Name() + std::to_string(x->id()));
}
op_desc->SetOutput(
"Ys", std::vector<std::string>(output_names.begin(), output_names.end()));
op_desc->SetType("anakin_engine");
std::unordered_map<std::string, std::string> output_name_map;
std::unordered_map<std::string, framework::ir::Node *> graph_var_map;
for (framework::ir::Node *node : graph->Nodes()) {
if (node->IsVar() && node->Var()) {
graph_var_map[node->Name()] = node;
}
}
auto &subgraph_nodes = *framework::ir::Agent(node).subgraph();
// The following procedure is used to rename all the intermediate
// variables and the output variables of the subgraph.
RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id,
&output_names_with_id, &output_names, &output_name_map,
graph_var_map, false);
// When anakin engine runs at the end of the operation,
// output_mapping help us copy the data from the renamed ITensor
// to Tensor.
std::vector<std::string> output_mapping;
for (auto name : output_names) {
PADDLE_ENFORCE(output_name_map.count(name) != 0);
output_mapping.push_back(output_name_map[name]);
}
PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(),
"the block has no var-desc");
PADDLE_ENFORCE(!output_mapping.empty());
op_desc->SetBlockAttr("sub_block", new_block);
SetAttr(op_desc->Proto(), "subgraph",
block_desc.Proto()->SerializeAsString());
// Set attrs
SetAttr(op_desc->Proto(), "parameters", params);
SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping);
int predictor_id = Get<int>("predictor_id");
auto engine_key = GenerateAnakinEngineKey(
input_names_with_id, output_names_with_id, std::to_string(predictor_id));
SetAttr(op_desc->Proto(), "engine_key", engine_key);
auto max_input_shape =
Get<std::map<std::string, std::vector<int>>>("max_input_shape");
auto program_inputs = program_desc->GetFeedTargetNames();
bool use_gpu = Get<bool>("use_gpu");
SetAttr(op_desc->Proto(), "use_gpu", use_gpu);
bool enable_int8 = Get<bool>("enable_int8");
SetAttr(op_desc->Proto(), "enable_int8", enable_int8);
if (enable_int8) {
CreateAnakinEngine<::anakin::Precision::INT8>(&block_desc, params,
input_names, output_mapping,
program_inputs, engine_key);
} else {
CreateAnakinEngine<::anakin::Precision::FP32>(&block_desc, params,
input_names, output_mapping,
program_inputs, engine_key);
}
}
template <::anakin::Precision PrecisionT>
void AnakinSubgraphPass::CreateAnakinEngine(
framework::BlockDesc *block_desc, const std::vector<std::string> &params,
const std::set<std::string> &input_names,
const std::vector<std::string> &output_mapping,
const std::vector<std::string> &program_inputs,
const std::string &engine_key) const {
framework::BlockDesc block_desc_temp(nullptr, block_desc->Proto());
bool use_gpu = Get<bool>("use_gpu");
auto max_batch_size = Get<int>("max_batch_size");
auto max_input_shape =
Get<std::map<std::string, std::vector<int>>>("max_input_shape");
if (use_gpu) {
#ifdef PADDLE_WITH_CUDA
inference::Singleton<
anakin::AnakinEngineManager<::anakin::saber::NV, PrecisionT>>::Global()
.Create(true, Get<int>("gpu_device_id"), max_batch_size,
max_input_shape, program_inputs, false, engine_key);
#endif
} else {
#ifdef ANAKIN_X86_PLACE
bool auto_config_layout = Get<bool>("auto_config_layout");
inference::Singleton<
anakin::AnakinEngineManager<::anakin::saber::X86, PrecisionT>>::Global()
.Create(true, Get<int>("gpu_device_id"), max_batch_size,
max_input_shape, program_inputs, auto_config_layout,
engine_key);
#endif
}
auto *scope = param_scope();
std::unordered_set<std::string> param_set(params.begin(), params.end());
if (use_gpu) {
#ifdef PADDLE_WITH_CUDA
auto *anakin_engine =
inference::Singleton<inference::anakin::AnakinEngineManager<
::anakin::saber::NV, PrecisionT>>::Global()
.Get(engine_key);
inference::Singleton<inference::anakin::AnakinOpConverter<
::anakin::saber::NV, PrecisionT>>::Global()
.ConvertBlockToAnakinEngine(
&block_desc_temp, scope,
std::vector<std::string>(input_names.begin(), input_names.end()),
param_set, output_mapping, anakin_engine);
#endif
} else {
#ifdef ANAKIN_X86_PLACE
auto *anakin_engine =
inference::Singleton<inference::anakin::AnakinEngineManager<
::anakin::saber::X86, PrecisionT>>::Global()
.Get(engine_key);
inference::Singleton<inference::anakin::AnakinOpConverter<
::anakin::saber::X86, PrecisionT>>::Global()
.ConvertBlockToAnakinEngine(
&block_desc_temp, scope,
std::vector<std::string>(input_names.begin(), input_names.end()),
param_set, output_mapping, anakin_engine);
#endif
}
}
} // namespace analysis
} // namespace inference
} // namespace paddle
REGISTER_PASS(anakin_subgraph_pass,
paddle::inference::analysis::AnakinSubgraphPass);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <paddle/fluid/framework/ir/fuse_pass_base.h>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
using anakin::Precision;
using anakin::saber::NV;
namespace paddle {
namespace inference {
namespace analysis {
class AnakinSubgraphPass : public framework::ir::FusePassBase {
public:
void ApplyImpl(framework::ir::Graph *graph) const override;
private:
void CreateAnakinOp(framework::ir::Node *x, framework::ir::Graph *graph,
const std::vector<std::string> &graph_params,
std::vector<std::string> *repetitive_params) const;
void CleanIntermediateOutputs(framework::ir::Node *node);
template <::anakin::Precision PrecisionT>
void CreateAnakinEngine(framework::BlockDesc *block_desc,
const std::vector<std::string> &params,
const std::set<std::string> &input_names,
const std::vector<std::string> &output_mapping,
const std::vector<std::string> &program_inputs,
const std::string &engine_key) const;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -43,10 +43,6 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -43,10 +43,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter)
endif() endif()
if (ANAKIN_SUBGRAPH)
set(inference_deps ${inference_deps} anakin_op_converter anakin_engine)
endif()
if(WITH_NGRAPH) if(WITH_NGRAPH)
set(inference_deps ${inference_deps} ngraph) set(inference_deps ${inference_deps} ngraph)
endif() endif()
...@@ -64,17 +60,3 @@ if(WITH_TESTING) ...@@ -64,17 +60,3 @@ if(WITH_TESTING)
endif() endif()
cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor benchmark ${inference_deps} cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor benchmark ${inference_deps}
ARGS --dirname=${WORD2VEC_MODEL_DIR}) ARGS --dirname=${WORD2VEC_MODEL_DIR})
if(ANAKIN_FOUND)
# Do not turn warnings into errors.
set_source_files_properties(api.cc api_anakin_engine.cc PROPERTIES COMPILE_FLAGS "-Wno-error")
cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3)
target_link_libraries(inference_anakin_api anakin anakin_saber_common)
cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS boost xxhash framework_proto eigen3)
target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common)
function(anakin_target target_name)
target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endfunction()
anakin_target(inference_anakin_api)
anakin_target(inference_anakin_api_shared)
endif()
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
namespace paddle { namespace paddle {
extern const std::vector<std::string> kTRTSubgraphPasses; extern const std::vector<std::string> kTRTSubgraphPasses;
extern const std::vector<std::string> kAnakinSubgraphPasses;
extern const std::vector<std::string> kLiteSubgraphPasses; extern const std::vector<std::string> kLiteSubgraphPasses;
PassStrategy *AnalysisConfig::pass_builder() const { PassStrategy *AnalysisConfig::pass_builder() const {
...@@ -120,15 +119,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -120,15 +119,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(use_mkldnn_quantizer_); CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_); CP_MEMBER(mkldnn_quantizer_config_);
CP_MEMBER(use_anakin_);
CP_MEMBER(anakin_max_batchsize_);
CP_MEMBER(anakin_max_input_shape_);
CP_MEMBER(anakin_min_subgraph_size_);
CP_MEMBER(anakin_precision_mode_);
CP_MEMBER(anakin_auto_config_layout_);
CP_MEMBER(anakin_passes_filter_);
CP_MEMBER(anakin_ops_filter_);
CP_MEMBER(use_lite_); CP_MEMBER(use_lite_);
CP_MEMBER(lite_precision_mode_); CP_MEMBER(lite_precision_mode_);
CP_MEMBER(lite_passes_filter_); CP_MEMBER(lite_passes_filter_);
...@@ -338,25 +328,6 @@ void AnalysisConfig::Update() { ...@@ -338,25 +328,6 @@ void AnalysisConfig::Update() {
pass_builder()->AppendAnalysisPass("memory_optimize_pass"); pass_builder()->AppendAnalysisPass("memory_optimize_pass");
} }
if (use_anakin_) {
PADDLE_ENFORCE(!use_tensorrt_,
"Anakin sub-graph and TensorRT sub-graph are not allowed to "
"run at the same time!");
if (use_gpu_) {
LOG(INFO) << "Run Anakin GPU mode";
} else {
LOG(INFO) << "Run Anakin CPU mode";
}
pass_builder()->ClearPasses();
for (const auto &pass : kAnakinSubgraphPasses) {
if (std::find(anakin_passes_filter_.begin(), anakin_passes_filter_.end(),
pass) == anakin_passes_filter_.end()) {
pass_builder()->AppendPass(pass);
}
}
}
if (use_lite_) { if (use_lite_) {
#ifndef PADDLE_WITH_LITE #ifndef PADDLE_WITH_LITE
LOG(WARNING) << "You tried to enable the lite subgraph " LOG(WARNING) << "You tried to enable the lite subgraph "
...@@ -413,10 +384,9 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -413,10 +384,9 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << specify_input_name_; ss << specify_input_name_;
ss << cpu_math_library_num_threads_; ss << cpu_math_library_num_threads_;
ss << use_anakin_;
ss << anakin_min_subgraph_size_;
ss << use_lite_; ss << use_lite_;
return ss.str(); return ss.str();
} }
...@@ -490,22 +460,6 @@ void AnalysisConfig::DisableGlogInfo() { ...@@ -490,22 +460,6 @@ void AnalysisConfig::DisableGlogInfo() {
Update(); Update();
} }
void AnalysisConfig::EnableAnakinEngine(
int max_batch_size, std::map<std::string, std::vector<int>> max_input_shape,
int min_subgraph_size, AnalysisConfig::Precision precision_mode,
bool auto_config_layout, std::vector<std::string> passes_filter,
std::vector<std::string> ops_filter) {
anakin_max_batchsize_ = max_batch_size;
anakin_max_input_shape_ = max_input_shape;
anakin_min_subgraph_size_ = min_subgraph_size;
anakin_passes_filter_ = passes_filter;
anakin_ops_filter_ = ops_filter;
use_anakin_ = true;
anakin_precision_mode_ = precision_mode;
anakin_auto_config_layout_ = auto_config_layout;
Update();
}
void AnalysisConfig::EnableLiteEngine( void AnalysisConfig::EnableLiteEngine(
AnalysisConfig::Precision precision_mode, AnalysisConfig::Precision precision_mode,
const std::vector<std::string> &passes_filter, const std::vector<std::string> &passes_filter,
......
...@@ -50,10 +50,6 @@ ...@@ -50,10 +50,6 @@
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h" #include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#endif #endif
#if PADDLE_WITH_ANAKIN
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#endif
namespace paddle { namespace paddle {
using inference::Singleton; using inference::Singleton;
...@@ -390,7 +386,6 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -390,7 +386,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetEnableMemoryOptim(config_.enable_memory_optim()); argument_.SetEnableMemoryOptim(config_.enable_memory_optim());
argument_.SetModelFromMemory(config_.model_from_memory_); argument_.SetModelFromMemory(config_.model_from_memory_);
// Analyze inference_program // Analyze inference_program
argument_.SetUseAnakin(config_.anakin_engine_enabled());
argument_.SetPredictorID(predictor_id_); argument_.SetPredictorID(predictor_id_);
argument_.SetOptimCacheDir(config_.opt_cache_dir_); argument_.SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) { if (!config_.model_dir().empty()) {
...@@ -417,17 +412,6 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -417,17 +412,6 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_); argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_);
} }
if (config_.anakin_engine_enabled()) {
argument_.SetAnakinMaxBatchSize(config_.anakin_max_batchsize_);
argument_.SetAnakinMaxInputShape(config_.anakin_max_input_shape_);
argument_.SetAnakinMinSubgraphSize(config_.anakin_min_subgraph_size_);
argument_.SetAnakinPrecisionMode(config_.anakin_precision_mode_);
argument_.SetAnakinAutoConfigLayout(config_.anakin_auto_config_layout_);
argument_.SetAnakinPassesFilter(config_.anakin_passes_filter_);
argument_.SetAnakinOpsFilter(config_.anakin_ops_filter_);
LOG(INFO) << "Anakin subgraph engine is enabled";
}
if (config_.lite_engine_enabled()) { if (config_.lite_engine_enabled()) {
argument_.SetLitePrecisionMode(config_.lite_precision_mode_); argument_.SetLitePrecisionMode(config_.lite_precision_mode_);
argument_.SetLitePassesFilter(config_.lite_passes_filter_); argument_.SetLitePassesFilter(config_.lite_passes_filter_);
...@@ -950,33 +934,3 @@ USE_TRT_CONVERTER(layer_norm); ...@@ -950,33 +934,3 @@ USE_TRT_CONVERTER(layer_norm);
USE_TRT_CONVERTER(gelu); USE_TRT_CONVERTER(gelu);
USE_TRT_CONVERTER(multihead_matmul); USE_TRT_CONVERTER(multihead_matmul);
#endif #endif
#if PADDLE_WITH_ANAKIN
USE_ANAKIN_CONVERTER(mul);
USE_ANAKIN_CONVERTER(fc);
USE_ANAKIN_CONVERTER(conv2d);
USE_ANAKIN_CONVERTER(conv2d_fusion);
USE_ANAKIN_CONVERTER(concat);
USE_ANAKIN_CONVERTER(split);
USE_ANAKIN_CONVERTER(relu);
USE_ANAKIN_CONVERTER(sigmoid);
USE_ANAKIN_CONVERTER(tanh);
USE_ANAKIN_CONVERTER(pool2d);
USE_ANAKIN_CONVERTER(elementwise_add);
USE_ANAKIN_CONVERTER(elementwise_mul);
USE_ANAKIN_CONVERTER(batch_norm);
USE_ANAKIN_CONVERTER(flatten);
USE_ANAKIN_CONVERTER(reshape);
USE_ANAKIN_CONVERTER(transpose);
USE_ANAKIN_CONVERTER(softmax);
USE_ANAKIN_CONVERTER(detection_out);
USE_ANAKIN_CONVERTER(density_prior_box);
USE_ANAKIN_CONVERTER(dropout);
USE_ANAKIN_CONVERTER(sum);
USE_ANAKIN_CONVERTER(prior_box);
USE_ANAKIN_CONVERTER(leaky_relu);
USE_ANAKIN_CONVERTER(affine_channel);
USE_ANAKIN_CONVERTER(relu6);
USE_ANAKIN_CONVERTER(swish);
USE_ANAKIN_CONVERTER(shuffle_channel);
#endif
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/inference/api/api_anakin_engine.h"
#include "paddle/fluid/inference/api/paddle_api.h"
#include "framework/core/net/net.h"
#include "framework/operators/ops.h"
#include "saber/funcs/timer.h"
namespace paddle {
using paddle::contrib::AnakinConfig;
template <typename T, Precision P, OpRunType R>
extern std::mutex PaddleInferenceAnakinPredictor<T, P, R>::mutex_;
template <typename T, Precision P, OpRunType R>
extern std::once_flag PaddleInferenceAnakinPredictor<T, P, R>::init_anakin_;
template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::InitEnv() {
std::call_once(this->init_anakin_, [this]() {
anakin::Env<T>::env_init(this->config_.max_stream);
});
anakin::TargetWrapper<T>::set_device(this->config_.device_id);
}
template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::InitNet() {
std::unique_lock<std::mutex> lock(this->mutex_);
delete this->executor_p_;
this->executor_p_ = new anakin::Net<T, P, R>(*this->graph_p_, true);
}
template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::SetContext() {
this->ctx_p_ = std::make_shared<anakin::Context<T>>(
this->config_.device_id, this->config_.data_stream_id,
this->config_.compute_stream_id);
}
template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::InitGraph() {
this->graph_p_ =
std::make_shared<anakin::graph::Graph<T, anakin::Precision::FP32>>();
if (!this->config_.model_file.empty()) {
this->graph_p_->load(this->config_.model_file);
} else if (this->config_.model_buf_p) {
this->graph_p_->load(this->config_.model_buf_p,
this->config_.model_buf_len);
} else {
LOG(FATAL) << "Model load error.";
}
this->input_names_ = this->graph_p_->get_ins();
this->output_names_ = this->graph_p_->get_outs();
for (auto &input_str : this->input_names_) {
if (this->config_.init_inputs_shape.find(input_str) ==
this->config_.init_inputs_shape.end()) {
LOG(FATAL) << input_str << " should be set in init_inputs_shape.";
}
std::vector<int> shape =
this->config_.init_inputs_shape.find(input_str)->second;
this->graph_p_->Reshape(input_str, shape);
}
}
template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::OptimizeGraph() {
if (!this->graph_p_->Optimize()) {
LOG(FATAL) << "Graph optimization error.";
}
}
template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::InitPredictor() {
this->InitEnv();
this->SetContext();
this->InitGraph();
this->OptimizeGraph();
this->InitNet();
}
template <typename T, Precision P, OpRunType R>
void PaddleInferenceAnakinPredictor<T, P, R>::Predict(int batch_size) {
anakin::TargetWrapper<T>::device_sync();
this->executor_p_->prediction();
anakin::TargetWrapper<T>::device_sync();
}
template <typename T, Precision P, OpRunType R>
bool PaddleInferenceAnakinPredictor<T, P, R>::Run(
const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, int batch_size) {
if (this->config_.re_allocable) {
return this->RunImpl(inputs, output_data, batch_size);
} else {
// Run inputs data that exceeds batch size in batches.
// 1. Reassign the batch size.
if (batch_size == -1) {
if (!inputs[0].lod.empty()) {
batch_size = inputs[0].lod[0].size() - 1;
} else {
batch_size = inputs[0].shape[0];
}
}
// 2. If the data don't need to be batched, run it directly.
if (batch_size <= this->config_.init_batch_size) {
return this->RunImpl(inputs, output_data);
}
// 3. Check the batch size and define temporary variables.
std::vector<PaddleTensor> cur_inputs;
std::vector<PaddleTensor> outputs_master;
std::vector<std::vector<paddle::PaddleTensor>> outputs_vec;
for (const auto &input : inputs) {
if (!input.lod.empty()) {
if (input.lod.size() != 1) {
return false;
}
if (input.lod[0].size() - 1 != batch_size) {
return false;
}
} else {
LOG(INFO) << "Non-lod mode to be implemented.";
return false;
}
PaddleTensor tensor;
tensor.name = input.name;
tensor.dtype = PaddleDType::FLOAT32;
cur_inputs.push_back(tensor);
}
for (auto output : *output_data) {
PaddleTensor tensor;
tensor.name = output.name;
outputs_master.push_back(tensor);
}
// 4. Batch execution.
for (size_t start_batch = 0; start_batch < batch_size;) {
auto end_batch = start_batch + this->config_.init_batch_size;
if (end_batch > batch_size) {
end_batch = batch_size;
}
auto cur_outputs = outputs_master;
for (size_t i = 0; i < inputs.size(); i++) {
auto start = inputs[i].lod[0][start_batch];
auto end = inputs[i].lod[0][end_batch];
std::vector<size_t> offsets;
for (size_t j = start_batch; j <= end_batch; j++) {
offsets.push_back(inputs[i].lod[0][j] -
inputs[i].lod[0][start_batch]);
}
auto mem_start = static_cast<float *>(inputs[i].data.data()) + start;
cur_inputs[i].data =
PaddleBuf(mem_start, (end - start) * sizeof(float));
cur_inputs[i].lod = std::vector<std::vector<size_t>>({offsets});
cur_inputs[i].shape =
std::vector<int>({static_cast<int>(end - start), 1, 1, 1});
}
if (!this->RunImpl(cur_inputs, &cur_outputs)) {
return false;
}
outputs_vec.push_back(cur_outputs);
start_batch = end_batch;
}
// 5. Copy the results to contiguous memory.
// Assume that each batch has the same final outputs size.
auto count = [](const std::vector<int> &v) {
int cnt = 1;
for_each(v.begin(), v.end(), [&cnt](int n) { cnt *= n; });
return cnt;
};
for (size_t i = 0; i < output_data->size(); i++) {
std::vector<int> shape = outputs_vec[i][0].shape;
shape[0] = batch_size;
int total_cnt = count(shape);
(*output_data)[i].shape = shape;
(*output_data)[i].data.Resize(total_cnt * sizeof(float));
float *addr = static_cast<float *>((*output_data)[i].data.data());
for (const auto &single_out : outputs_vec) {
int cnt = count(single_out[i].shape);
memcpy(addr, single_out[i].data.data(), cnt * sizeof(float));
addr += cnt;
}
}
}
return true;
}
template <typename T, Precision P, OpRunType R>
bool PaddleInferenceAnakinPredictor<T, P, R>::RunImpl(
const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, int batch_size) {
anakin::TargetWrapper<T>::set_device(this->config_.device_id);
for (const auto &input : inputs) {
if (input.dtype != PaddleDType::FLOAT32) {
LOG(FATAL) << "Only support float type inputs. " << input.name
<< "'s type is not float";
}
auto d_tensor_p = this->executor_p_->get_in(input.name);
// For backward compatibility.
auto net_shape = d_tensor_p->shape();
if (net_shape.size() != input.shape.size()) {
LOG(FATAL) << " input " << input.name
<< "'s shape size should be equal to that of net";
}
#ifndef ANAKIN_MLU_PLACE
int sum = 1;
for_each(input.shape.begin(), input.shape.end(), [&](int n) { sum *= n; });
if (sum > net_shape.count()) {
if (this->config_.re_allocable) {
this->graph_p_->Reshape(input.name, input.shape);
this->InitNet();
d_tensor_p = this->executor_p_->get_in(input.name);
} else {
LOG(FATAL)
<< "Run failed because Anakin was expected not to reallocate "
"memory.";
}
}
#endif
std::vector<int> tmp_shape;
for (auto s : input.shape) {
tmp_shape.push_back(s);
}
auto *data = static_cast<float *>(input.data.data());
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0,
tmp_shape);
#ifndef ANAKIN_MLU_PLACE
d_tensor_p->reshape(tmp_shape);
#endif
if (input.lod.size() > 0) {
if (input.lod.size() > 1) {
LOG(FATAL) << " input lod first dim should <=1, but you set "
<< input.lod.size();
}
std::vector<int> lod(input.lod[0].begin(), input.lod[0].end());
std::vector<std::vector<int>> offset({lod});
d_tensor_p->set_seq_offset(offset);
VLOG(3) << "offset.size(): " << offset[0].size();
for (int i = 0; i < offset[0].size(); i++) {
VLOG(3) << offset[0][i];
}
}
d_tensor_p->copy_from(h_tensor);
}
this->Predict(batch_size);
if (output_data->empty()) {
LOG(FATAL) << "The output param in the Run function is incorrect.";
}
for (auto &output : *output_data) {
if (std::find(this->output_names_.begin(), this->output_names_.end(),
output.name) == this->output_names_.end()) {
LOG(FATAL) << output.name << " is not in the outputs of the graph.";
}
auto *d_tensor_p = this->executor_p_->get_out(output.name);
auto tmp_shape = d_tensor_p->valid_shape();
#ifdef ANAKIN_MLU_PLACE
tmp_shape.set_num(batch_size);
#endif
output.shape = tmp_shape;
if (output.data.length() < tmp_shape.count() * sizeof(float)) {
output.data.Resize(tmp_shape.count() * sizeof(float));
}
auto *data = static_cast<float *>(output.data.data());
anakin::saber::Tensor<typename anakin::DefaultHostType<T>::Host_type>
h_tensor(data, typename anakin::DefaultHostType<T>::Host_type(), 0,
tmp_shape);
h_tensor.copy_from(*d_tensor_p);
}
return true;
}
template <typename T, Precision P, OpRunType R>
bool PaddleInferenceAnakinPredictor<T, P, R>::Reset(
PaddleInferenceAnakinPredictor<T, P, R> *predictor) {
this->config_ = predictor->GetConfig();
this->graph_p_ = predictor->GetGraph();
this->input_names_ = predictor->GetInputNames();
this->output_names_ = predictor->GetOutputNames();
this->ctx_p_ = std::make_shared<anakin::Context<T>>(
this->config_.device_id, this->config_.data_stream_id,
this->config_.compute_stream_id);
this->InitNet();
return true;
}
template <typename T, Precision P, OpRunType R>
std::unique_ptr<PaddlePredictor>
PaddleInferenceAnakinPredictor<T, P, R>::New() {
return std::unique_ptr<PaddlePredictor>(
new PaddleInferenceAnakinPredictor<T, P, R>());
}
// the cloned new Predictor of anakin share the same net weights from original
// Predictor
template <typename T, Precision P, OpRunType R>
std::unique_ptr<PaddlePredictor>
PaddleInferenceAnakinPredictor<T, P, R>::Clone() {
VLOG(3) << "Anakin Predictor::clone";
std::unique_ptr<PaddlePredictor> cls = std::move(this->New());
auto anakin_predictor_p =
dynamic_cast<PaddleInferenceAnakinPredictor<T, P, R> *>(cls.get());
if (!anakin_predictor_p) {
LOG(FATAL) << "fail to call Init";
}
anakin_predictor_p->Reset(this);
return cls;
}
#ifdef ANAKIN_MLU_PLACE
template <Precision P, OpRunType R>
std::unique_ptr<PaddlePredictor>
PaddleInferenceAnakinMLUPredictor<P, R>::New() {
return std::unique_ptr<PaddlePredictor>(
new PaddleInferenceAnakinMLUPredictor<P, R>());
}
template <Precision P, OpRunType R>
void PaddleInferenceAnakinMLUPredictor<P, R>::SetContext() {
this->ctx_p_ = std::make_shared<anakin::Context<anakin::MLU>>(
this->config_.device_id, this->config_.data_stream_id,
this->config_.compute_stream_id);
this->ctx_p_->set_model_parallel(this->config_.model_parallel);
this->ctx_p_->set_fusion(this->config_.op_fuse);
this->ctx_p_->enable_batch_changable();
this->ctx_p_->enable_channel_duplicate();
}
template <Precision P, OpRunType R>
void PaddleInferenceAnakinMLUPredictor<P, R>::OptimizeGraph() {
if (!this->graph_p_->fusion_optimize(this->config_.op_fuse)) {
LOG(FATAL) << "Graph optimization error.";
}
}
template <Precision P, OpRunType R>
void PaddleInferenceAnakinMLUPredictor<P, R>::InitNet() {
std::unique_lock<std::mutex> lock(this->mutex_);
delete this->executor_p_;
this->executor_p_ = new anakin::Net<anakin::MLU, P, R>();
this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true);
}
template <Precision P, OpRunType R>
void PaddleInferenceAnakinMLUPredictor<P, R>::Predict(int batch_size) {
this->executor_p_->fusion_prediction(batch_size);
}
#endif
#ifdef ANAKIN_BM_PLACE
template <Precision P, OpRunType R>
std::unique_ptr<PaddlePredictor> PaddleInferenceAnakinBMPredictor<P, R>::New() {
return std::unique_ptr<PaddlePredictor>(
new PaddleInferenceAnakinBMPredictor<P, R>());
}
template <Precision P, OpRunType R>
void PaddleInferenceAnakinBMPredictor<P, R>::OptimizeGraph() {
if (!this->graph_p_->fusion_optimize()) {
LOG(FATAL) << "Graph optimization error.";
}
}
template <Precision P, OpRunType R>
void PaddleInferenceAnakinBMPredictor<P, R>::InitNet() {
std::unique_lock<std::mutex> lock(this->mutex_);
delete this->executor_p_;
this->executor_p_ = new anakin::Net<anakin::BM, P, R>();
this->executor_p_->fusion_init(*this->graph_p_, this->ctx_p_, true);
}
template <Precision P, OpRunType R>
void PaddleInferenceAnakinBMPredictor<P, R>::Predict(int batch_size) {
this->executor_p_->fusion_prediction();
}
#endif
#ifdef PADDLE_WITH_CUDA
template class PaddleInferenceAnakinPredictor<
anakin::NV, anakin::Precision::FP32, ::anakin::OpRunType::ASYNC>;
#endif
#ifdef ANAKIN_X86_PLACE
template class PaddleInferenceAnakinPredictor<
anakin::X86, anakin::Precision::FP32, ::anakin::OpRunType::ASYNC>;
#endif
#ifdef ANAKIN_MLU_PLACE
template class PaddleInferenceAnakinMLUPredictor<anakin::Precision::FP32,
::anakin::OpRunType::SYNC>;
#endif
#ifdef ANAKIN_BM_PLACE
template class PaddleInferenceAnakinBMPredictor<anakin::Precision::FP32,
::anakin::OpRunType::ASYNC>;
#endif
// A factory to help create difference predictor.
template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
const contrib::AnakinConfig &config) {
#ifdef PADDLE_WITH_CUDA
if (config.target_type == contrib::AnakinConfig::NVGPU) {
return std::unique_ptr<PaddlePredictor>(
new PaddleInferenceAnakinPredictor<anakin::NV, anakin::Precision::FP32,
::anakin::OpRunType::ASYNC>(config));
}
#endif
#ifdef ANAKIN_X86_PLACE
if (config.target_type == contrib::AnakinConfig::X86) {
return std::unique_ptr<PaddlePredictor>(
new PaddleInferenceAnakinPredictor<anakin::X86, anakin::Precision::FP32,
::anakin::OpRunType::ASYNC>(config));
}
#endif
#ifdef ANAKIN_MLU_PLACE
if (config.target_type == contrib::AnakinConfig::MLU) {
return std::unique_ptr<PaddlePredictor>(
new PaddleInferenceAnakinMLUPredictor<anakin::Precision::FP32,
::anakin::OpRunType::SYNC>(
config));
}
#endif
#ifdef ANAKIN_BM_PLACE
if (config.target_type == contrib::AnakinConfig::BM) {
return std::unique_ptr<PaddlePredictor>(
new PaddleInferenceAnakinBMPredictor<anakin::Precision::FP32,
::anakin::OpRunType::ASYNC>(
config));
}
#endif
LOG(FATAL) << "Anakin Predictor create on unknown platform: "
<< config.target_type;
return nullptr;
}
template <typename T, Precision P, OpRunType R>
void DisplayOpTimer(anakin::Net<T, P, R> *net_executor, int epoch) {
#ifdef PADDLE_ANAKIN_ENABLE_OP_TIMER
std::vector<float> op_time = net_executor->get_op_time();
auto exec_funcs = net_executor->get_exec_funcs();
auto op_param = net_executor->get_op_param();
for (int i = 0; i < op_time.size(); i++) {
LOG(INFO) << "name: " << exec_funcs[i].name
<< " op_type: " << exec_funcs[i].op_name
<< " op_param: " << op_param[i] << " time " << op_time[i] / epoch;
}
std::map<std::string, float> op_map;
for (int i = 0; i < op_time.size(); i++) {
auto it = op_map.find(op_param[i]);
if (it != op_map.end())
op_map[op_param[i]] += op_time[i];
else
op_map.insert(std::pair<std::string, float>(op_param[i], op_time[i]));
}
for (auto it = op_map.begin(); it != op_map.end(); ++it) {
LOG(INFO) << it->first << " " << (it->second) / epoch << " ms";
}
#endif
}
template <typename T, Precision P, OpRunType R>
PaddleInferenceAnakinPredictor<T, P, R>::~PaddleInferenceAnakinPredictor() {
DisplayOpTimer<T, P, R>(this->executor_p_, this->config_.init_batch_size);
delete this->executor_p_;
this->executor_p_ = nullptr;
}
} // namespace paddle
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file contains the implementation of inference API with Anakin engine
* embeded, this API can only support Anakin models.
*/
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "framework/core/net/net.h"
#include "framework/graph/graph.h"
#include "paddle/fluid/inference/api/paddle_anakin_config.h"
#include "saber/core/shape.h"
#include "saber/saber_types.h"
namespace paddle {
using contrib::AnakinConfig;
using anakin::Precision;
using anakin::OpRunType;
template <typename T, Precision P, OpRunType R>
class PaddleInferenceAnakinPredictor : public PaddlePredictor {
public:
PaddleInferenceAnakinPredictor() = default;
explicit PaddleInferenceAnakinPredictor(const AnakinConfig& config)
: config_(config) {
this->InitPredictor();
}
// NOTE Unlike the native engine, the buffers of anakin engine's output_data
// should be allocated first.
bool Run(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data,
int batch_size = -1) override;
std::unique_ptr<PaddlePredictor> Clone() override;
bool Reset(PaddleInferenceAnakinPredictor<T, P, R>* predictor);
void InitPredictor();
std::shared_ptr<anakin::graph::Graph<T, P>> GetGraph() {
return this->graph_p_;
}
std::vector<std::string> GetInputNames() override {
return this->input_names_;
}
std::vector<std::string> GetOutputNames() override {
return this->output_names_;
}
const AnakinConfig& GetConfig() const { return this->config_; }
~PaddleInferenceAnakinPredictor() override;
protected:
void InitEnv();
void InitGraph();
virtual void OptimizeGraph();
virtual void InitNet();
virtual void SetContext();
virtual void Predict(int batch_size);
virtual std::unique_ptr<PaddlePredictor> New();
static std::mutex mutex_;
AnakinConfig config_;
std::shared_ptr<anakin::Context<T>> ctx_p_;
std::shared_ptr<anakin::graph::Graph<T, P>> graph_p_;
anakin::Net<T, P, R>* executor_p_{nullptr};
std::vector<std::string> input_names_;
std::vector<std::string> output_names_;
private:
bool RunImpl(const std::vector<PaddleTensor>& inputs,
std::vector<PaddleTensor>* output_data, int batch_size = -1);
static std::once_flag init_anakin_;
};
#ifdef ANAKIN_MLU_PLACE
template <Precision P, OpRunType R>
class PaddleInferenceAnakinMLUPredictor final
: public PaddleInferenceAnakinPredictor<anakin::MLU, P, R> {
public:
PaddleInferenceAnakinMLUPredictor() = default;
explicit PaddleInferenceAnakinMLUPredictor(const AnakinConfig& config) {
this->config_ = config;
this->InitPredictor();
}
std::unique_ptr<PaddlePredictor> New() override;
void SetContext() override;
void OptimizeGraph() override;
void InitNet() override;
void Predict(int batch_size) override;
};
#endif
#ifdef ANAKIN_BM_PLACE
template <Precision P, OpRunType R>
class PaddleInferenceAnakinBMPredictor final
: public PaddleInferenceAnakinPredictor<anakin::BM, P, R> {
public:
PaddleInferenceAnakinBMPredictor() = default;
explicit PaddleInferenceAnakinBMPredictor(const AnakinConfig& config) {
this->config_ = config;
this->InitPredictor();
}
std::unique_ptr<PaddlePredictor> New() override;
void OptimizeGraph() override;
void InitNet() override;
void Predict(int batch_size) override;
};
#endif
} // namespace paddle
...@@ -24,17 +24,16 @@ that is important when there are multiple inputs and need to distinguish which v ...@@ -24,17 +24,16 @@ that is important when there are multiple inputs and need to distinguish which v
## engine ## engine
The inference APIs has two different underlying engines The inference APIs has two different underlying engines
- the native engine, which is consists of the native operators and framework, - the native engine
- the Anakin engine, which has an Anakin library embedded. - the tensorrt engine
The native engine takes a native Paddle model as input, and supports any model that trained by Paddle, The native engine, which is consists of the native operators and framework, takes a native Paddle model
the Anakin engine is faster for some model, as input, and supports any model that trained by Paddle.
but it can only take the Anakin model as input(user need to transform the format first manually) and currently not all Paddle models are supported.
```c++ ```c++
enum class PaddleEngineKind { enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility. kNative = 0, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference. kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops.
}; };
``` ```
......
...@@ -29,10 +29,9 @@ struct PaddleTensor { ...@@ -29,10 +29,9 @@ struct PaddleTensor {
## engine ## engine
高层 API 底层有多种优化实现,我们称之为 engine,目前有种 engine 高层 API 底层有多种优化实现,我们称之为 engine,目前有种 engine
- 原生 engine,由 paddle 原生的 forward operator 组成,可以天然支持所有paddle 训练出的模型, - 原生 engine,由 paddle 原生的 forward operator 组成,可以天然支持所有paddle 训练出的模型,
- Anakin engine,封装了 [Anakin](https://github.com/PaddlePaddle/Anakin) ,在某些模型上性能不错,但只能接受自带模型格式,无法支持所有 paddle 模型,
- TensorRT mixed engine,用子图的方式支持了 [TensorRT](https://developer.nvidia.com/tensorrt) ,支持所有paddle 模型,并自动切割部分计算子图到 TensorRT 上加速(WIP) - TensorRT mixed engine,用子图的方式支持了 [TensorRT](https://developer.nvidia.com/tensorrt) ,支持所有paddle 模型,并自动切割部分计算子图到 TensorRT 上加速(WIP)
其实现为 其实现为
...@@ -40,7 +39,6 @@ struct PaddleTensor { ...@@ -40,7 +39,6 @@ struct PaddleTensor {
```c++ ```c++
enum class PaddleEngineKind { enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility. kNative = 0, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference.
kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops. kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops.
}; };
``` ```
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cassert>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle_api.h" // NOLINT
namespace paddle {
namespace contrib {
// Configurations for Anakin engine.
struct AnakinConfig : public PaddlePredictor::Config {
enum TargetType { NVGPU = 0, X86, MLU, BM };
int device_id{0};
std::string model_file;
std::map<std::string, std::vector<int>> init_inputs_shape;
int init_batch_size{-1};
bool re_allocable{true};
int max_stream{4};
int data_stream_id{0};
int compute_stream_id{0};
char* model_buf_p{nullptr};
size_t model_buf_len{0};
TargetType target_type;
#ifdef ANAKIN_MLU_PLACE
int model_parallel{8};
int data_parallel{1};
bool op_fuse{false};
bool sparse{false};
#endif
};
} // namespace contrib
} // namespace paddle
...@@ -161,25 +161,13 @@ struct AnalysisConfig { ...@@ -161,25 +161,13 @@ struct AnalysisConfig {
*/ */
bool tensorrt_engine_enabled() const { return use_tensorrt_; } bool tensorrt_engine_enabled() const { return use_tensorrt_; }
/** /**
* \brief Turn on the usage of Anakin sub-graph engine. * \brief Turn on the usage of Lite sub-graph engine.
*/ */
void EnableAnakinEngine(
int max_batch_size = 1,
std::map<std::string, std::vector<int>> max_input_shape = {},
int min_subgraph_size = 6, Precision precision = Precision::kFloat32,
bool auto_config_layout = false,
std::vector<std::string> passes_filter = {},
std::vector<std::string> ops_filter = {});
void EnableLiteEngine( void EnableLiteEngine(
AnalysisConfig::Precision precision_mode = Precision::kFloat32, AnalysisConfig::Precision precision_mode = Precision::kFloat32,
const std::vector<std::string>& passes_filter = {}, const std::vector<std::string>& passes_filter = {},
const std::vector<std::string>& ops_filter = {}); const std::vector<std::string>& ops_filter = {});
/** A boolean state indicating whether the Anakin sub-graph engine is used.
*/
bool anakin_engine_enabled() const { return use_anakin_; }
/** A boolean state indicating whether the Lite sub-graph engine is used. /** A boolean state indicating whether the Lite sub-graph engine is used.
*/ */
bool lite_engine_enabled() const { return use_lite_; } bool lite_engine_enabled() const { return use_lite_; }
...@@ -350,15 +338,6 @@ struct AnalysisConfig { ...@@ -350,15 +338,6 @@ struct AnalysisConfig {
mutable std::unique_ptr<PassStrategy> pass_builder_; mutable std::unique_ptr<PassStrategy> pass_builder_;
bool use_anakin_{false};
int anakin_max_batchsize_;
int anakin_min_subgraph_size_{6};
std::map<std::string, std::vector<int>> anakin_max_input_shape_;
Precision anakin_precision_mode_;
bool anakin_auto_config_layout_{false};
std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_;
bool use_lite_{false}; bool use_lite_{false};
std::vector<std::string> lite_passes_filter_; std::vector<std::string> lite_passes_filter_;
std::vector<std::string> lite_ops_filter_; std::vector<std::string> lite_ops_filter_;
......
...@@ -352,7 +352,6 @@ enum class PaddleEngineKind { ...@@ -352,7 +352,6 @@ enum class PaddleEngineKind {
kNative = 0, /*!< Use the native Fluid facility. */ kNative = 0, /*!< Use the native Fluid facility. */
kAutoMixedTensorRT, /*!< Automatically mix Fluid with TensorRT. */ kAutoMixedTensorRT, /*!< Automatically mix Fluid with TensorRT. */
kAnalysis, /*!< More optimization. */ kAnalysis, /*!< More optimization. */
kAnakin /*!< Use Anakin for inference, not mature yet. */
}; };
template <typename ConfigT, PaddleEngineKind engine> template <typename ConfigT, PaddleEngineKind engine>
......
...@@ -28,6 +28,3 @@ limitations under the License. */ ...@@ -28,6 +28,3 @@ limitations under the License. */
#include "paddle_analysis_config.h" // NOLINT #include "paddle_analysis_config.h" // NOLINT
#include "paddle_api.h" // NOLINT #include "paddle_api.h" // NOLINT
#if (defined PADDLE_WITH_ANAKIN)
#include "paddle_anakin_config.h" // NOLINT
#endif
...@@ -92,19 +92,6 @@ const std::vector<std::string> kTRTSubgraphPasses({ ...@@ -92,19 +92,6 @@ const std::vector<std::string> kTRTSubgraphPasses({
"transpose_flatten_concat_fuse_pass", "transpose_flatten_concat_fuse_pass",
}); });
// The following passes works for Anakin sub-graph engine.
const std::vector<std::string> kAnakinSubgraphPasses({
"quant_conv2d_dequant_fuse_pass", //
"simplify_anakin_priorbox_detection_out_pass", //
"fillconstant_elementwisemul_fuse", //
"fc_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
"fc_gru_fuse_pass", //
"shuffle_channel_detect_pass", //
"anakin_subgraph_pass", //
"fc_gru_fuse_pass", //
});
const std::vector<std::string> kLiteSubgraphPasses({ const std::vector<std::string> kLiteSubgraphPasses({
#ifdef PADDLE_WITH_LITE #ifdef PADDLE_WITH_LITE
"lite_subgraph_pass", "lite_subgraph_pass",
......
...@@ -162,7 +162,6 @@ class GpuPassStrategy : public PassStrategy { ...@@ -162,7 +162,6 @@ class GpuPassStrategy : public PassStrategy {
}; };
extern const std::vector<std::string> kTRTSubgraphPasses; extern const std::vector<std::string> kTRTSubgraphPasses;
extern const std::vector<std::string> kAnakinSubgraphPasses;
extern const std::vector<std::string> kLiteSubgraphPasses; extern const std::vector<std::string> kLiteSubgraphPasses;
} // namespace paddle } // namespace paddle
...@@ -188,17 +188,6 @@ typedef struct PD_MaxInputShape { ...@@ -188,17 +188,6 @@ typedef struct PD_MaxInputShape {
int shape_size; int shape_size;
} PD_MaxInputShape; } PD_MaxInputShape;
PADDLE_CAPI_EXPORT extern void PD_EnableAnakinEngine(
PD_AnalysisConfig* config, int max_batch_size = 1,
PD_MaxInputShape* max_input_shape = NULL, int max_input_shape_size = 0,
int min_subgraph_size = 6, Precision precision = Precision::kFloat32,
bool auto_config_layout = false, char** passes_filter = NULL,
int passes_filter_size = 0, char** ops_filter = NULL,
int ops_filter_size = 0);
PADDLE_CAPI_EXPORT extern bool PD_AnakinEngineEnabled(
const PD_AnalysisConfig* config);
PADDLE_CAPI_EXPORT extern void PD_SwitchIrDebug(PD_AnalysisConfig* config, PADDLE_CAPI_EXPORT extern void PD_SwitchIrDebug(PD_AnalysisConfig* config,
bool x = true); bool x = true);
......
...@@ -165,42 +165,6 @@ bool PD_TensorrtEngineEnabled(const PD_AnalysisConfig* config) { ...@@ -165,42 +165,6 @@ bool PD_TensorrtEngineEnabled(const PD_AnalysisConfig* config) {
return config->config.tensorrt_engine_enabled(); return config->config.tensorrt_engine_enabled();
} }
void PD_EnableAnakinEngine(PD_AnalysisConfig* config, int max_batch_size,
PD_MaxInputShape* max_input_shape,
int max_input_shape_size, int min_subgraph_size,
Precision precision, bool auto_config_layout,
char** passes_filter, int passes_filter_size,
char** ops_filter, int ops_filter_size) {
PADDLE_ENFORCE_NOT_NULL(config);
std::map<std::string, std::vector<int>> mis;
if (max_input_shape) {
for (int i = 0; i < max_input_shape_size; ++i) {
std::vector<int> tmp_shape;
tmp_shape.assign(
max_input_shape[i].shape,
max_input_shape[i].shape + max_input_shape[i].shape_size);
mis[std::string(max_input_shape[i].name)] = std::move(tmp_shape);
}
}
std::vector<std::string> pf;
std::vector<std::string> of;
if (passes_filter) {
pf.assign(passes_filter, passes_filter + passes_filter_size);
}
if (ops_filter) {
of.assign(ops_filter, ops_filter + ops_filter_size);
}
config->config.EnableAnakinEngine(max_batch_size, mis, min_subgraph_size,
paddle::ConvertToACPrecision(precision),
auto_config_layout, pf, of);
}
bool PD_AnakinEngineEnabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config);
return config->config.anakin_engine_enabled();
}
void PD_SwitchIrDebug(PD_AnalysisConfig* config, bool x) { void PD_SwitchIrDebug(PD_AnalysisConfig* config, bool x) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(config);
config->config.SwitchIrDebug(x); config->config.SwitchIrDebug(x);
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
DEFINE_string(model, "", "Directory of the inference model.");
namespace paddle {
contrib::AnakinConfig Config() {
// Determine the use of memory here.
std::map<std::string, std::vector<int>> init_inputs_shape;
init_inputs_shape["input_0"] = std::vector<int>({1, 3, 112, 112});
contrib::AnakinConfig config;
config.target_type = contrib::AnakinConfig::MLU;
config.model_file = FLAGS_model;
config.init_inputs_shape = init_inputs_shape;
// Determine the device execution context.
config.device_id = 0;
config.data_stream_id = 0;
config.compute_stream_id = 0;
// Set re_allocable and op_fuse TRUE.
config.re_allocable = true;
config.op_fuse = true;
return config;
}
void single_test() {
// 1. Defining basic data structures.
auto config = paddle::Config();
auto predictor =
paddle::CreatePaddlePredictor<paddle::contrib::AnakinConfig,
paddle::PaddleEngineKind::kAnakin>(config);
// 2. Define the data structure of the predictor inputs and outputs.
std::vector<paddle::PaddleTensor> input_tensors;
std::vector<paddle::PaddleTensor> output_tensors;
// 3. Define and fill the inputs tensor.
int num = 1;
int channel = 3;
int height = 112;
int width = 112;
std::vector<float> input(num * channel * height * width, 1);
std::vector<std::vector<float>> inputs({input});
const std::vector<std::string> input_names{"input_0"};
for (auto& name : input_names) {
paddle::PaddleTensor tensor;
tensor.name = name;
tensor.dtype = PaddleDType::FLOAT32;
input_tensors.push_back(tensor);
}
for (size_t j = 0; j < input_tensors.size(); j++) {
input_tensors[j].data =
paddle::PaddleBuf(&inputs[j][0], inputs[j].size() * sizeof(float));
// The shape of each execution can be changed.
input_tensors[j].shape = std::vector<int>({num, channel, height, width});
}
// 4. Set the output placeholder of predictor.
PaddleTensor predict_out, score_out;
predict_out.name = "landmark_predict_out";
score_out.name = "landmark_score_out";
output_tensors.push_back(predict_out);
output_tensors.push_back(score_out);
// 5. Execution predict.
predictor->Run(input_tensors, &output_tensors);
// 6. Take out the output data.
for (auto out : output_tensors) {
float* data_o = static_cast<float*>(out.data.data());
LOG(INFO) << out.name << " size = " << out.data.length() / sizeof(float);
}
}
} // namespace paddle
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
paddle::single_test();
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <cmath>
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#define BUFFER_SIZE (10000)
#define COMPARE_OUTPUTS (1)
#define PRINT_INPUTS (0)
DEFINE_string(model, "", "Directory of the inference model.");
DEFINE_string(datapath, "", "Path of the dataset.");
DEFINE_string(truthpath, "", "Path of the dataset.");
DEFINE_int32(batch_size, 1, "Batch size per execution.");
DEFINE_int32(repeats, 1, "Number of iterations.");
DEFINE_int32(
start_line, 0,
"The starting line of the text file read (this line will be read).");
DEFINE_int32(end_line, 1000000,
"The ending line of the text file read (this line will be read).");
DEFINE_int32(init_batch_size, 40,
"Max batch size for Anakin memory allocation.");
DEFINE_int32(threads_num, 2, "Threads num for Anakin.");
class Data {
public:
Data(std::string file_name, size_t batch_size, size_t start = 0,
size_t end = 1000000)
: _batch_size(batch_size), _total_length(0), _inputs_size(6) {
_file.open(file_name);
_file.seekg(_file.end);
_total_length = _file.tellg();
_file.seekg(_file.beg);
read_file_to_vec(start, end);
reset_current_line();
}
void reset_current_line();
const std::vector<std::string>& get_lines();
void read_file_to_vec(const size_t start, const size_t end);
int get_next_batches(std::vector<std::vector<float>>* inputs,
std::vector<std::vector<size_t>>* seq_offsets);
private:
std::fstream _file;
int _batch_size;
size_t _total_length;
size_t _inputs_size;
std::vector<std::string> _lines;
size_t _current_line;
};
void Data::read_file_to_vec(const size_t start, const size_t end) {
std::string line;
size_t count = 0;
_lines.clear();
while (std::getline(_file, line)) {
if (count >= start && count <= end) {
_lines.push_back(line);
}
count++;
}
}
const std::vector<std::string>& Data::get_lines() { return _lines; }
void Data::reset_current_line() { _current_line = 0; }
int Data::get_next_batches(std::vector<std::vector<float>>* data,
std::vector<std::vector<size_t>>* offsets) {
data->clear();
offsets->clear();
data->resize(_inputs_size);
offsets->resize(_inputs_size);
for (auto& offset : *offsets) {
offset.push_back(0);
}
int seq_num = -1;
int pre_query_index = -1;
while (_current_line < _lines.size()) {
int cur_query_index = -1;
std::vector<std::string> line;
paddle::inference::split(_lines[_current_line], ';', &line);
for (size_t i = 0; i < line.size(); i++) {
std::vector<float> float_v;
paddle::inference::split_to_float(line[i], ' ', &float_v);
if (i == 0) {
cur_query_index = float_v[0];
if (pre_query_index != -1 && cur_query_index != pre_query_index) {
return seq_num;
}
seq_num++;
_current_line++;
} else {
if (float_v.size() == 0) {
float_v.push_back(-1);
}
(*data)[i - 1].insert((*data)[i - 1].end(), float_v.begin(),
float_v.end());
(*offsets)[i - 1].push_back((*offsets)[i - 1][seq_num] +
float_v.size());
}
}
if (seq_num + 1 >= _batch_size) {
return seq_num;
} else {
pre_query_index = cur_query_index;
}
}
return seq_num;
}
namespace paddle {
contrib::AnakinConfig GetConfig() {
contrib::AnakinConfig config;
std::map<std::string, std::vector<int>> init_inputs_shape;
init_inputs_shape["q_basic"] = std::vector<int>({1000, 1, 1, 1});
init_inputs_shape["q_bigram0"] = std::vector<int>({1000, 1, 1, 1});
init_inputs_shape["pt_basic"] = std::vector<int>({2000, 1, 1, 1});
init_inputs_shape["pa_basic"] = std::vector<int>({4000, 1, 1, 1});
init_inputs_shape["pa_bigram0"] = std::vector<int>({4000, 1, 1, 1});
init_inputs_shape["pt_bigram0"] = std::vector<int>({2000, 1, 1, 1});
// using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = contrib::AnakinConfig::NVGPU;
config.model_file = FLAGS_model;
config.device_id = 0;
config.init_batch_size = FLAGS_init_batch_size;
config.init_inputs_shape = init_inputs_shape;
config.re_allocable = false;
return config;
}
void single_test(PaddlePredictor* predictor_master) {
auto predictor = predictor_master->Clone();
Data data(FLAGS_datapath, FLAGS_batch_size, FLAGS_start_line, FLAGS_end_line);
std::vector<std::vector<float>> inputs;
std::vector<std::vector<size_t>> seq_offsets;
std::vector<float> compare_outputs;
const std::vector<std::string> input_names{"q_basic", "q_bigram0",
"pt_basic", "pt_bigram0",
"pa_basic", "pa_bigram0"};
std::vector<PaddleTensor> input_tensors;
std::vector<PaddleTensor> output_tensors;
for (auto& name : input_names) {
PaddleTensor tensor;
tensor.name = name;
tensor.dtype = PaddleDType::FLOAT32;
input_tensors.push_back(tensor);
}
PaddleTensor tensor_out;
tensor_out.name = "save_infer_model/scale_0";
tensor_out.shape = std::vector<int>({});
tensor_out.data = PaddleBuf();
tensor_out.dtype = PaddleDType::FLOAT32;
output_tensors.push_back(tensor_out);
inference::Timer timer;
for (int i = 0; i < FLAGS_repeats; i++) {
data.reset_current_line();
size_t count = 0;
float time_sum = 0;
while (data.get_next_batches(&inputs, &seq_offsets) >= 0) {
#if PRINT_INPUTS
for (size_t i = 0; i < inputs.size(); i++) {
LOG(INFO) << "data " << i;
for (size_t j = 0; j < inputs[i].size(); j++) {
LOG(INFO) << j << ": " << inputs[i][j];
}
for (auto j : seq_offsets[i]) {
LOG(INFO) << "offsets: " << i << ": " << j;
}
}
#endif
for (size_t j = 0; j < input_tensors.size(); j++) {
input_tensors[j].data =
PaddleBuf(&inputs[j][0], inputs[j].size() * sizeof(float));
input_tensors[j].lod =
std::vector<std::vector<size_t>>({seq_offsets[j]});
input_tensors[j].shape =
std::vector<int>({static_cast<int>(inputs[j].size()), 1, 1, 1});
}
timer.tic();
predictor->Run(input_tensors, &output_tensors);
float time = timer.toc();
#if COMPARE_OUTPUTS
float* data_o = static_cast<float*>(output_tensors[0].data.data());
LOG(INFO) << "outputs[0].data.size() = "
<< output_tensors[0].data.length() / sizeof(float);
size_t sum = 1;
for_each(output_tensors[0].shape.begin(), output_tensors[0].shape.end(),
[&](int n) { sum *= n; });
for (size_t j = 0; j < sum; ++j) {
LOG(INFO) << "output[" << j << "]: " << data_o[j];
compare_outputs.push_back(data_o[j]);
}
#endif
LOG(INFO) << "Single Time: " << time;
count++;
if (count > 10) {
time_sum += timer.toc();
}
}
inference::PrintTime(FLAGS_batch_size, FLAGS_repeats, 1, 0,
time_sum / (count - 10));
#if COMPARE_OUTPUTS
Data data(FLAGS_truthpath, 1);
const std::vector<std::string> truth_vals = data.get_lines();
for (size_t j = 0; j < truth_vals.size(); j++) {
float truth = std::atof(truth_vals[j].c_str());
float compa = compare_outputs[j];
float diff = std::abs(truth - compa);
LOG(INFO) << "[DIFF " << j << " ] " << diff;
if (diff > 0.0001) {
LOG(FATAL) << "The result is wrong!";
}
}
LOG(INFO) << "The result is correct!";
#endif
}
}
} // namespace paddle
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::vector<std::thread> threads;
auto config = paddle::GetConfig();
config.data_stream_id = 0;
config.compute_stream_id = 0;
std::unique_ptr<paddle::PaddlePredictor> predictor_master =
paddle::CreatePaddlePredictor<paddle::contrib::AnakinConfig,
paddle::PaddleEngineKind::kAnakin>(config);
for (int i = 0; i < FLAGS_threads_num; i++) {
threads.push_back(std::thread(paddle::single_test, predictor_master.get()));
}
for (auto& t : threads) {
t.join();
}
return 0;
}
...@@ -92,9 +92,6 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { ...@@ -92,9 +92,6 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
CHECK(quantizer_enable) << "NO"; CHECK(quantizer_enable) << "NO";
PD_SetMkldnnCacheCapacity(config, 0); PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_EnableAnakinEngine(config);
bool anakin_enable = PD_AnakinEngineEnabled(config);
LOG(INFO) << anakin_enable;
PD_DeleteAnalysisConfig(config); PD_DeleteAnalysisConfig(config);
} }
#endif #endif
......
...@@ -34,10 +34,6 @@ if (WITH_GPU AND TENSORRT_FOUND) ...@@ -34,10 +34,6 @@ if (WITH_GPU AND TENSORRT_FOUND)
add_subdirectory(tensorrt) add_subdirectory(tensorrt)
endif() endif()
if (ANAKIN_SUBGRAPH)
add_subdirectory(anakin)
endif()
if (WITH_LITE) if (WITH_LITE)
add_subdirectory(lite) add_subdirectory(lite)
endif() endif()
......
op_library(anakin_engine_op DEPS anakin_engine anakin_op_converter)
# file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(anakin_engine);\n")
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <string>
#include <vector>
#include "paddle/fluid/operators/anakin/anakin_engine_op.h"
namespace paddle {
namespace operators {
class AnakinEngineOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Xs", "A list of inputs.").AsDuplicable();
AddOutput("Ys", "A list of outputs").AsDuplicable();
AddAttr<std::string>("subgraph", "the subgraph.");
AddAttr<std::string>(
"engine_key",
"The engine_key here is used to distinguish different TRT Engines");
AddAttr<framework::BlockDesc *>("sub_block", "the trt block");
AddComment("Anakin engine operator.");
}
};
class AnakinEngineInferVarType : public framework::VarTypeInference {
public:
void operator()(framework::InferVarTypeContext *ctx) const override {}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(anakin_engine, ops::AnakinEngineOp, ops::AnakinEngineOpMaker,
ops::AnakinEngineOpMaker);
#endif // PADDLE_WITH_CUDA
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_CUDA
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
#include "paddle/fluid/inference/anakin/engine.h"
#include "paddle/fluid/inference/analysis/helper.h"
namespace paddle {
namespace operators {
using inference::Singleton;
using inference::anakin::AnakinEngine;
class AnakinEngineOp : public framework::OperatorBase {
private:
std::vector<std::string> input_names_;
std::unordered_set<std::string> param_names_;
std::string engine_key_;
std::string engine_serialized_data_;
bool use_gpu_;
bool enable_int8_;
public:
AnakinEngineOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: framework::OperatorBase(type, inputs, outputs, attrs) {
input_names_ = Inputs("Xs");
engine_key_ = Attr<std::string>("engine_key");
auto params = Attr<std::vector<std::string>>("parameters");
use_gpu_ = Attr<bool>("use_gpu");
enable_int8_ = Attr<bool>("enable_int8");
for (const auto &param : params) {
param_names_.insert(param);
}
}
protected:
void RunImpl(const framework::Scope &scope,
const platform::Place &dev_place) const override {
RunAnakin(scope, dev_place);
}
void RunAnakin(const framework::Scope &scope,
const platform::Place &dev_place) const {
PADDLE_ENFORCE(!input_names_.empty(), "should pass more than one inputs");
std::vector<std::string> output_maps =
Attr<std::vector<std::string>>("output_name_mapping");
std::map<std::string, framework::LoDTensor *> inputs;
for (const auto &x : Inputs("Xs")) {
if (param_names_.count(x)) continue;
auto &t =
inference::analysis::GetFromScope<framework::LoDTensor>(scope, x);
inputs.insert({x, &t});
}
std::map<std::string, framework::LoDTensor *> outputs;
int output_index = 0;
for (const auto &y : Outputs("Ys")) {
auto *fluid_v = scope.FindVar(y);
PADDLE_ENFORCE_NOT_NULL(fluid_v, "no output variable called %s", y);
auto *fluid_t = fluid_v->GetMutable<framework::LoDTensor>();
outputs.insert({output_maps[output_index], fluid_t});
output_index += 1;
}
if (enable_int8_) {
Execute<::anakin::Precision::INT8>(inputs, outputs, dev_place);
} else {
Execute<::anakin::Precision::FP32>(inputs, outputs, dev_place);
}
}
template <::anakin::Precision PrecisionT>
void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
const std::map<std::string, framework::LoDTensor *> &outputs,
const platform::Place &dev_place) const {
if (use_gpu_) {
#ifdef PADDLE_WITH_CUDA
platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(dev_place);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx)
.stream();
auto *engine =
inference::Singleton<inference::anakin::AnakinEngineManager<
::anakin::saber::NV, PrecisionT>>::Global()
.Get(engine_key_);
engine->Execute(inputs, outputs, stream);
#endif
} else {
#ifdef ANAKIN_X86_PLACE
auto *engine =
inference::Singleton<inference::anakin::AnakinEngineManager<
::anakin::saber::X86, PrecisionT>>::Global()
.Get(engine_key_);
engine->Execute(inputs, outputs);
#else
LOG(FATAL) << "Unknown Platform for AnakinEngine!";
#endif
}
}
};
} // namespace operators
} // namespace paddle
#endif // PADDLE_WITH_CUDA
...@@ -422,15 +422,6 @@ void BindAnalysisConfig(py::module *m) { ...@@ -422,15 +422,6 @@ void BindAnalysisConfig(py::module *m) {
py::arg("min_subgraph_size") = 3, py::arg("min_subgraph_size") = 3,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
py::arg("use_static") = false, py::arg("use_calib_mode") = true) py::arg("use_static") = false, py::arg("use_calib_mode") = true)
.def("enable_anakin_engine", &AnalysisConfig::EnableAnakinEngine,
py::arg("max_batch_size") = 1,
py::arg("max_input_shape") =
std::map<std::string, std::vector<int>>(),
py::arg("min_subgraph_size") = 6,
py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32,
py::arg("auto_config_layout") = false,
py::arg("passes_filter") = std::vector<std::string>(),
py::arg("ops_filter") = std::vector<std::string>())
.def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled) .def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled)
.def("switch_ir_debug", &AnalysisConfig::SwitchIrDebug, .def("switch_ir_debug", &AnalysisConfig::SwitchIrDebug,
py::arg("x") = true) py::arg("x") = true)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册