未验证 提交 42e62a74 编写于 作者: S sunsetlh 提交者: GitHub

[core] [XPU] add added xpu conv2d fuse, vis fuse and many ops for wangpan clarity feature (#4084)

上级 64398557
......@@ -67,3 +67,7 @@ USE_MIR_PASS(__xpu__multi_encoder_fuse_pass);
USE_MIR_PASS(__xpu__embedding_with_eltwise_add_fuse_pass);
USE_MIR_PASS(__xpu__fc_fuse_pass);
USE_MIR_PASS(__xpu__mmdnn_fuse_pass);
USE_MIR_PASS(__xpu__conv2d_fuse_pass);
USE_MIR_PASS(__xpu__conv2d_link_previous_out_max_pass);
USE_MIR_PASS(__xpu__sfa_head_meanstd_fuse_pass);
USE_MIR_PASS(__xpu__sfa_head_moment_fuse_pass);
......@@ -30,6 +30,10 @@ lite_cc_library(mir_passes
fusion/__xpu__embedding_with_eltwise_add_fuse_pass.cc
fusion/__xpu__fc_fuse_pass.cc
fusion/__xpu__mmdnn_fuse_pass.cc
fusion/__xpu__conv2d_fuse_pass.cc
fusion/__xpu__conv2d_link_previous_out_max_pass.cc
fusion/__xpu__sfa_head_meanstd_fuse_pass.cc
fusion/__xpu__sfa_head_moment_fuse_pass.cc
fusion/match_matrix_activation_fuse_pass.cc
fusion/scales_fuse_pass.cc
fusion/sequence_reverse_embedding_fuse_pass.cc
......
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "lite/backends/xpu/math.h"
#include "lite/core/mir/pass_registry.h"
#include "lite/core/mir/pattern_matcher_high_api.h"
namespace paddle {
namespace lite {
namespace mir {
namespace fusion {
/* link the previous __xpu__conv2d's OutputMax to */
/* next __xpu__conv2d as InputMax */
/* For example: */
/* graph[1]: sub block */
/* in_Input */
/* in_Filter | in_FilterMax */
/* \ | / */
/* \ | / */
/* in_Bias ------- __xpu__conv2d */
/* | \ */
/* | \ */
/* out_Output out_OutputMax */
/* | */
/* | */
/* __xpu__conv2d */
/* | */
/* | */
/* out_Output */
/* */
/* After the pass is applied: */
/* in_Input */
/* in_Filter | in_FilterMax */
/* \ | / */
/* \ | / */
/* in_Bias ------- __xpu__conv2d */
/* | \ */
/* | \ */
/* out_Output out_OutputMax */
/* | / */
/* | / */
/* __xpu__conv2d */
/* | */
/* | */
/* out_Output */
class XPUConv2dLinkFuser : public FuseBase {
public:
explicit XPUConv2dLinkFuser(bool with_branch) : _with_branch(with_branch) {}
void BuildPattern() override {
auto* input = VarNode("input")
->assert_is_op_input("__xpu__conv2d", "Input")
->AsInput();
auto* filter = VarNode("filter")
->assert_is_op_input("__xpu__conv2d", "Filter")
->AsInput();
auto* filter_max = VarNode("filter_max")
->assert_is_op_input("__xpu__conv2d", "FilterMax")
->AsInput();
auto* bias =
VarNode("bias")->assert_is_op_input("__xpu__conv2d", "Bias")->AsInput();
auto* xpu_conv = OpNode("xpu_conv", "__xpu__conv2d");
auto* xpu_conv_out = VarNode("xpu_conv_out")
->assert_is_op_output("__xpu__conv2d", "Output")
->AsOutput();
auto* xpu_conv_out_max =
VarNode("xpu_conv_out_max")
->assert_is_op_output("__xpu__conv2d", "OutputMax")
->AsOutput();
*input >> *xpu_conv >> *xpu_conv_out;
*filter >> *xpu_conv;
*filter_max >> *xpu_conv;
*bias >> *xpu_conv;
*xpu_conv >> *xpu_conv_out_max;
if (_with_branch) {
auto* branch = VarNode("branch")
->assert_is_op_input("__xpu__conv2d", "Branch")
->AsInput();
*branch >> *xpu_conv;
}
}
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override {
auto conv_instruct = matched.at("xpu_conv")->stmt();
auto op_desc = *conv_instruct->mutable_op_info();
auto conv_old = conv_instruct->op();
// try to find input_max
std::string max_input_name = matched.at("input")->arg()->name + "_max";
auto* max_input_node = graph->RetrieveArgument(max_input_name);
if (max_input_node != nullptr &&
(!op_desc.HasAttr("has_input_max") ||
!op_desc.GetAttr<bool>("has_input_max"))) {
op_desc.SetInput("InputMax", {max_input_name});
op_desc.SetAttr("has_input_max", true);
conv_instruct->ResetOp(op_desc, conv_old->valid_places());
DirectedLink(max_input_node, matched.at("xpu_conv"));
}
}
private:
bool _with_branch;
};
} // namespace fusion
class XPUConv2dLinkPass : public ProgramPass {
public:
void Apply(const std::unique_ptr<SSAGraph>& graph) override {
if (GetBoolFromEnv("XPU_ENABLE_XTCL")) return;
fusion::XPUConv2dLinkFuser fuser1(true);
fuser1(graph.get());
// TODO(sunsetlh): need fix bug in no branch case
fusion::XPUConv2dLinkFuser fuser2(false);
fuser2(graph.get());
}
};
} // namespace mir
} // namespace lite
} // namespace paddle
REGISTER_MIR_PASS(__xpu__conv2d_link_previous_out_max_pass,
paddle::lite::mir::XPUConv2dLinkPass)
.BindTargets({TARGET(kXPU)})
.BindKernel("__xpu__conv2d");
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "lite/backends/xpu/math.h"
#include "lite/core/mir/pass_registry.h"
#include "lite/core/mir/pattern_matcher_high_api.h"
namespace paddle {
namespace lite {
namespace mir {
namespace fusion {
// Special fuse pass for the subgraph block in vis clarity model
// block desc:
// [["reduce_mean",
// ["concat"],
// ["elementwise_sub",
// ["square", ["reduce_sum", ["scale", ["sqrt"]]]]]]]
class XPUSfaHeadMeanstdFuser : public FuseBase {
public:
void BuildPattern() override {
auto* reduce_mean_input = VarNode("reduce_mean_input")
->assert_is_op_output("reshape2", "Out")
->assert_is_op_input("reduce_mean", "X")
->AsInput();
auto* reduce_mean = OpNode("reduce_mean", "reduce_mean")->AsIntermediate();
auto* reduce_mean_out = VarNode("reduce_mean_out")
->assert_is_op_output("reduce_mean", "Out")
->assert_is_op_nth_input("concat", "X", 0)
->assert_is_op_input("elementwise_sub", "Y")
->AsIntermediate();
auto* elementwise_sub =
OpNode("elementwise_sub", "elementwise_sub")->AsIntermediate();
auto* elementwise_sub_out =
VarNode("elementwise_sub_out")
->assert_is_op_output("elementwise_sub", "Out")
->assert_is_op_input("square", "X")
->AsIntermediate();
auto* square = OpNode("square", "square")->AsIntermediate();
auto* square_out = VarNode("square_out")
->assert_is_op_output("square", "Out")
->assert_is_op_input("reduce_sum", "X")
->AsIntermediate();
auto* reduce_sum = OpNode("reduce_sum", "reduce_sum")->AsIntermediate();
auto* reduce_sum_out = VarNode("reduce_sum_out")
->assert_is_op_output("reduce_sum", "Out")
->assert_is_op_input("elementwise_div", "X")
->AsIntermediate();
auto* fill_constant =
OpNode("fill_constant", "fill_constant")->AsIntermediate();
auto* fill_constant_out = VarNode("fill_constant_out")
->assert_is_op_output("fill_constant", "Out")
->AsIntermediate();
auto* elementwise_div =
OpNode("elementwise_div", "elementwise_div")->AsIntermediate();
auto* elementwise_div_out =
VarNode("elementwise_div_out")
->assert_is_op_output("elementwise_div", "Out")
->assert_is_op_input("sqrt", "X")
->AsIntermediate();
auto* sqrt = OpNode("sqrt", "sqrt")->AsIntermediate();
auto* sqrt_out = VarNode("sqrt_out")
->assert_is_op_output("sqrt", "Out")
->assert_is_op_nth_input("concat", "X", 1)
->AsIntermediate();
auto* concat = OpNode("concat", "concat")->AsIntermediate();
auto* out =
VarNode("out")->assert_is_op_output("concat", "Out")->AsOutput();
std::vector<PMNode*> elementwise_sub_inputs{reduce_mean_out,
reduce_mean_input};
std::vector<PMNode*> elementwise_div_inputs{reduce_sum_out,
fill_constant_out};
std::vector<PMNode*> concat_inputs{reduce_mean_out, sqrt_out};
*reduce_mean_input >> *reduce_mean >> *reduce_mean_out;
elementwise_sub_inputs >> *elementwise_sub >> *elementwise_sub_out;
*elementwise_sub_out >> *square >> *square_out;
*square_out >> *reduce_sum >> *reduce_sum_out;
*fill_constant >> *fill_constant_out;
elementwise_div_inputs >> *elementwise_div >> *elementwise_div_out;
*elementwise_div_out >> *sqrt >> *sqrt_out;
concat_inputs >> *concat >> *out;
}
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override {
auto reduce_mean = matched.at("reduce_mean")->stmt()->op();
auto* scope = reduce_mean->scope();
auto op_desc = GenOpDesc(matched);
auto vis_op = LiteOpRegistry::Global().Create("__xpu__sfa_head");
auto& valid_places = reduce_mean->valid_places();
vis_op->Attach(op_desc, scope);
auto* new_op_node = graph->GraphCreateInstructNode(vis_op, valid_places);
IR_NODE_LINK_TO(matched.at("reduce_mean_input"), new_op_node);
IR_NODE_LINK_TO(new_op_node, matched.at("out"));
}
private:
cpp::OpDesc GenOpDesc(const key2nodes_t& matched) {
cpp::OpDesc op_desc = *matched.at("reduce_mean")->stmt()->op_info();
op_desc.mutable_inputs()->clear();
op_desc.mutable_outputs()->clear();
op_desc.SetType("__xpu__sfa_head");
op_desc.SetInput("Input", {matched.at("reduce_mean_input")->arg()->name});
op_desc.SetOutput("Output", {matched.at("out")->arg()->name});
op_desc.SetAttr("op_type", std::string("meanstd"));
return op_desc;
}
};
} // namespace fusion
class XPUSfaHeadMeanstdFusePass : public ProgramPass {
public:
void Apply(const std::unique_ptr<SSAGraph>& graph) override {
if (GetBoolFromEnv("XPU_ENABLE_XTCL")) {
return;
}
fusion::XPUSfaHeadMeanstdFuser fuser;
fuser(graph.get());
}
};
} // namespace mir
} // namespace lite
} // namespace paddle
REGISTER_MIR_PASS(__xpu__sfa_head_meanstd_fuse_pass,
paddle::lite::mir::XPUSfaHeadMeanstdFusePass)
.BindTargets({TARGET(kXPU)})
.BindKernel("reduce_mean");
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include "lite/backends/xpu/math.h"
#include "lite/core/mir/pass_registry.h"
#include "lite/core/mir/pattern_matcher_high_api.h"
namespace paddle {
namespace lite {
namespace mir {
namespace fusion {
// Special fuse pass for the subgraph block in vis clarity model
// block desc:
// [["reduce_mean",
// ["concat"],
// ["elementwise_sub",
// ["square", ["reduce_mean", ["sqrt"]]],
// ["abs", ["pow", ["elementwise_mul", ["reduce_mean", ["abs",
// ["pow"]]]]]],
// ["sign"],
// ["abs", ["pow", ["reduce_mean", ["abs", ["pow"]]]]]]]]
class XPUSfaHeadMomentFuser : public FuseBase {
public:
void BuildPattern() override {
auto* reduce_mean_input = VarNode("reduce_mean_input")
->assert_is_op_output("reshape2", "Out")
->assert_is_op_input("reduce_mean", "X")
->assert_is_op_input("elementwise_sub", "X")
->AsInput();
auto* reduce_mean = OpNode("reduce_mean", "reduce_mean")->AsIntermediate();
auto* reduce_mean_out = VarNode("reduce_mean_out")
->assert_is_op_output("reduce_mean", "Out")
->assert_is_op_nth_input("concat", "X", 0)
->assert_is_op_input("elementwise_sub", "Y")
->AsIntermediate();
auto* elementwise_sub =
OpNode("elementwise_sub", "elementwise_sub")->AsIntermediate();
auto* elementwise_sub_out =
VarNode("elementwise_sub_out")
->assert_is_op_output("elementwise_sub", "Out")
->assert_is_op_input("square", "X")
->assert_is_op_input("abs", "X")
->assert_is_op_input("sign", "X")
->AsIntermediate();
auto* square = OpNode("square", "square")->AsIntermediate();
auto* square_out = VarNode("square_out")
->assert_is_op_output("square", "Out")
->assert_is_op_input("reduce_mean", "X")
->AsIntermediate();
auto* reduce_mean_es =
OpNode("es_reduce_mean", "reduce_mean")->AsIntermediate();
auto* reduce_mean_out_es = VarNode("reduce_mean_out_es")
->assert_is_op_output("reduce_mean", "Out")
->assert_is_op_input("sqrt", "X")
->AsIntermediate();
auto* sqrt = OpNode("sqrt", "sqrt")->AsIntermediate();
auto* sqrt_out = VarNode("sqrt_out")
->assert_is_op_output("sqrt", "Out")
->assert_is_op_nth_input("concat", "X", 1)
->AsIntermediate();
auto* concat = OpNode("concat", "concat")->AsIntermediate();
auto* out =
VarNode("out")->assert_is_op_output("concat", "Out")->AsOutput();
auto* abs_e2 = OpNode("e2_abs", "abs")->AsIntermediate();
auto* abs_e2_out = VarNode("abs_e2_out")
->assert_is_op_input("pow", "X")
->assert_is_op_output("abs", "Out")
->AsIntermediate();
auto* pow_e2 = OpNode("e2_pow", "pow")->AsIntermediate();
auto* pow_e2_out = VarNode("pow_e2_out")
->assert_is_op_input("elementwise_mul", "X")
->assert_is_op_output("pow", "Out")
->AsIntermediate();
auto* sign_e3 = OpNode("e3_sign", "sign")->AsIntermediate();
auto* sign_e3_out = VarNode("sign_e3_out")
->assert_is_op_input("elementwise_mul", "Y")
->assert_is_op_output("sign", "Out")
->AsIntermediate();
auto* elementwise_mul_top =
OpNode("elementwise_mul_top", "elementwise_mul")->AsIntermediate();
auto* elementwise_mul_top_out =
VarNode("elementwise_mul_top_out")
->assert_is_op_input("reduce_mean", "X")
->assert_is_op_output("elementwise_mul", "Out")
->AsIntermediate();
auto* reduce_mean_e2 =
OpNode("reduce_mean_e2", "reduce_mean")->AsIntermediate();
auto* reduce_mean_e2_out = VarNode("reduce_mean_e2_out")
->assert_is_op_input("abs", "X")
->assert_is_op_input("sign", "X")
->assert_is_op_output("reduce_mean", "Out")
->AsIntermediate();
auto* abs_e2_2 = OpNode("abs_e2_2", "abs")->AsIntermediate();
auto* abs_e2_2_out = VarNode("abs_e2_2_out")
->assert_is_op_input("pow", "X")
->assert_is_op_output("abs", "Out")
->AsIntermediate();
auto* pow_e2_2 = OpNode("pow_e2_2", "pow")->AsIntermediate();
auto* pow_e2_2_out = VarNode("pow_e2_2_out")
->assert_is_op_nth_input("elementwise_mul", "X", 0)
->assert_is_op_output("pow", "Out")
->AsIntermediate();
auto* sign_e3_2 = OpNode("sign_e3_2", "sign")->AsIntermediate();
auto* sign_e3_2_out = VarNode("sign_e3_2_out")
->assert_is_op_input("elementwise_mul", "Y")
->assert_is_op_output("sign", "Out")
->AsIntermediate();
auto* elementwise_mul_bottom =
OpNode("elementwise_mul_bottom", "elementwise_mul")->AsIntermediate();
auto* elementwise_mul_bottom_out =
VarNode("elementwise_mul_bottom_out")
->assert_is_op_output("elementwise_mul", "Out")
->assert_is_op_nth_input("concat", "X", 2)
->AsIntermediate();
// e4
auto* abs_e_4 = OpNode("abs_e_4", "abs")->AsIntermediate();
auto* abs_e_4_out = VarNode("abs_e_4_out")
->assert_is_op_output("abs", "Out")
->assert_is_op_input("pow", "X")
->AsIntermediate();
auto* pow_e_4 = OpNode("pow_e_4", "pow")->AsIntermediate();
auto* pow_e_4_out = VarNode("pow_e_4_out")
->assert_is_op_output("pow", "Out")
->assert_is_op_input("reduce_mean", "X")
->AsIntermediate();
auto* reduce_mean_4 = OpNode("reduce_mean_4")->AsIntermediate();
auto* reduce_mean_4_out = VarNode("reduce_mean_4_out")
->assert_is_op_output("reduce_mean", "Out")
->assert_is_op_input("abs", "X")
->AsIntermediate();
auto* abs_e_4_2 = OpNode("abs_e_4_2", "abs")->AsIntermediate();
auto* abs_e_4_2_out = VarNode("abs_e_4_2_out")
->assert_is_op_output("abs", "Out")
->assert_is_op_input("pow", "X")
->AsIntermediate();
auto* pow_e_4_2 = OpNode("pow_e_4_2", "pow")->AsIntermediate();
auto* pow_e_4_2_out = VarNode("pow_e_4_2_out")
->assert_is_op_output("pow", "Out")
->assert_is_op_nth_input("concat", "X", 3)
->AsIntermediate();
std::vector<PMNode*> elementwise_sub_inputs{reduce_mean_input,
reduce_mean_out};
*reduce_mean_input >> *reduce_mean >> *reduce_mean_out;
elementwise_sub_inputs >> *elementwise_sub >> *elementwise_sub_out;
*elementwise_sub_out >> *square >> *square_out;
*square_out >> *reduce_mean_es >> *reduce_mean_out_es;
*reduce_mean_out_es >> *sqrt >> *sqrt_out;
*elementwise_sub_out >> *sign_e3 >> *sign_e3_out;
std::vector<PMNode*> elementwise_mul_top_inputs{pow_e2_out, sign_e3_out};
*elementwise_sub_out >> *abs_e2 >> *abs_e2_out;
*abs_e2_out >> *pow_e2 >> *pow_e2_out;
elementwise_mul_top_inputs >> *elementwise_mul_top >>
*elementwise_mul_top_out;
*elementwise_mul_top_out >> *reduce_mean_e2 >> *reduce_mean_e2_out;
*reduce_mean_e2_out >> *abs_e2_2 >> *abs_e2_2_out;
*abs_e2_2_out >> *pow_e2_2 >> *pow_e2_2_out;
*reduce_mean_e2_out >> *sign_e3_2 >> *sign_e3_2_out;
std::vector<PMNode*> elementwise_mul_bottom_inputs{pow_e2_2_out,
sign_e3_2_out};
elementwise_mul_bottom_inputs >> *elementwise_mul_bottom >>
*elementwise_mul_bottom_out;
*elementwise_sub_out >> *abs_e_4 >> *abs_e_4_out;
*abs_e_4_out >> *pow_e_4 >> *pow_e_4_out;
*pow_e_4_out >> *reduce_mean_4 >> *reduce_mean_4_out;
*reduce_mean_4_out >> *abs_e_4_2 >> *abs_e_4_2_out;
*abs_e_4_2_out >> *pow_e_4_2 >> *pow_e_4_2_out;
std::vector<PMNode*> concat_inputs{
reduce_mean_out, sqrt_out, elementwise_mul_bottom_out, pow_e_4_2_out};
concat_inputs >> *concat >> *out;
}
void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override {
auto reduce_mean = matched.at("reduce_mean")->stmt()->op();
auto* scope = reduce_mean->scope();
auto op_desc = GenOpDesc(matched);
auto vis_op = LiteOpRegistry::Global().Create("__xpu__sfa_head");
auto& valid_places = reduce_mean->valid_places();
vis_op->Attach(op_desc, scope);
auto* new_op_node = graph->GraphCreateInstructNode(vis_op, valid_places);
IR_NODE_LINK_TO(matched.at("reduce_mean_input"), new_op_node);
IR_NODE_LINK_TO(new_op_node, matched.at("out"));
}
private:
cpp::OpDesc GenOpDesc(const key2nodes_t& matched) {
cpp::OpDesc op_desc = *matched.at("reduce_mean")->stmt()->op_info();
op_desc.mutable_inputs()->clear();
op_desc.mutable_outputs()->clear();
op_desc.SetType("__xpu__sfa_head");
op_desc.SetInput("Input", {matched.at("reduce_mean_input")->arg()->name});
op_desc.SetOutput("Output", {matched.at("out")->arg()->name});
op_desc.SetAttr("op_type", std::string("moment"));
return op_desc;
}
};
} // namespace fusion
class XPUSfaHeadMomentFusePass : public ProgramPass {
public:
void Apply(const std::unique_ptr<SSAGraph>& graph) override {
if (GetBoolFromEnv("XPU_ENABLE_XTCL")) {
return;
}
fusion::XPUSfaHeadMomentFuser fuser;
fuser(graph.get());
}
};
} // namespace mir
} // namespace lite
} // namespace paddle
REGISTER_MIR_PASS(__xpu__sfa_head_moment_fuse_pass,
paddle::lite::mir::XPUSfaHeadMomentFusePass)
.BindTargets({TARGET(kXPU)})
.BindKernel("reduce_mean");
......@@ -122,7 +122,15 @@ std::string Visualize(mir::SSAGraph* graph) {
dot.AddNode(var_name, {});
exists_var_names.insert(var_name);
}
dot.AddEdge(var_name, op_name, {});
std::vector<Dot::Attr> attrs;
std::string arg_name;
if (op_info->GetInputArgname(var_name, &arg_name)) {
attrs.emplace_back("label", arg_name);
} else {
VLOG(5) << "Can not find the input argument for var " << var_name
<< " in " << op_type;
}
dot.AddEdge(var_name, op_name, attrs);
}
for (auto& x : node->outlinks) {
std::string var_name;
......@@ -136,7 +144,15 @@ std::string Visualize(mir::SSAGraph* graph) {
dot.AddNode(var_name, {});
exists_var_names.insert(var_name);
}
dot.AddEdge(op_name, var_name, {});
std::vector<Dot::Attr> attrs;
std::string arg_name;
if (op_info->GetOutputArgname(var_name, &arg_name)) {
attrs.emplace_back("label", arg_name);
} else {
VLOG(5) << "Can not find the output argument for var " << var_name
<< " in " << op_type;
}
dot.AddEdge(op_name, var_name, attrs);
}
// Output its all of attributes(name and values)
os << "* " << op_name << "\n";
......
......@@ -109,6 +109,10 @@ class Optimizer {
"identity_dropout_eliminate_pass",
"__xpu__resnet_fuse_pass",
"__xpu__resnet_cbam_fuse_pass",
"__xpu__conv2d_fuse_pass",
"__xpu__conv2d_link_previous_out_max_pass",
"__xpu__sfa_head_meanstd_fuse_pass",
"__xpu__sfa_head_moment_fuse_pass",
"__xpu__mmdnn_fuse_pass",
"__xpu__multi_encoder_fuse_pass",
"__xpu__embedding_with_eltwise_add_fuse_pass",
......
......@@ -90,8 +90,6 @@ add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.
add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(fill_constant_compute_arm ARM basic SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(fill_constant_batch_size_like_compute_arm ARM basic SRCS fill_constant_batch_size_like_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lstm_arm ARM extra SRCS lstm_compute.cc DEPS ${lite_kernel_deps} math_arm)
......
......@@ -8,6 +8,8 @@ add_kernel(unsqueeze_compute_host Host basic SRCS unsqueeze_compute.cc DEPS ${li
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
add_kernel(expand_compute_host Host basic SRCS expand_compute.cc DEPS ${lite_kernel_deps})
add_kernel(expand_as_compute_host Host basic SRCS expand_as_compute.cc DEPS ${lite_kernel_deps})
add_kernel(fill_constant_compute_host Host basic SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps})
add_kernel(fill_constant_batch_size_like_compute_host Host basic SRCS fill_constant_batch_size_like_compute.cc DEPS ${lite_kernel_deps})
add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
......
......@@ -12,16 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/fill_constant_batch_size_like_compute.h"
#include "lite/kernels/host/fill_constant_batch_size_like_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
void FillConstantBatchSizeLikeCompute::Run() {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<ARMContext>();
if (param.dtype == static_cast<int32_t>(lite::core::FluidType::FP32)) {
auto data = param.out->template mutable_data<float>();
......@@ -50,18 +49,18 @@ void FillConstantBatchSizeLikeCompute::Run() {
}
}
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(
fill_constant_batch_size_like,
kARM,
kHost,
kAny,
kNCHW,
paddle::lite::kernels::arm::FillConstantBatchSizeLikeCompute,
paddle::lite::kernels::host::FillConstantBatchSizeLikeCompute,
def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.Finalize();
......@@ -19,10 +19,10 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
class FillConstantBatchSizeLikeCompute
: public KernelLite<TARGET(kARM), PRECISION(kAny)> {
: public KernelLite<TARGET(kHost), PRECISION(kAny)> {
public:
using param_t = operators::FillConstantBatchSizeLikeParam;
......@@ -31,7 +31,7 @@ class FillConstantBatchSizeLikeCompute
~FillConstantBatchSizeLikeCompute() {}
};
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -12,16 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/fill_constant_compute.h"
#include "lite/kernels/host/fill_constant_compute.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
void FillConstantCompute::Run() {
auto& param = *param_.get_mutable<param_t>();
auto& context = ctx_->As<ARMContext>();
if (param.dtype == static_cast<int32_t>(lite::core::FluidType::FP32)) {
auto data = param.out->template mutable_data<float>();
......@@ -50,21 +49,21 @@ void FillConstantCompute::Run() {
}
}
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
// float
REGISTER_LITE_KERNEL(fill_constant,
kARM,
kHost,
kAny,
kNCHW,
paddle::lite::kernels::arm::FillConstantCompute,
paddle::lite::kernels::host::FillConstantCompute,
def)
.BindInput("ShapeTensor",
{LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.BindInput("ShapeTensorList",
{LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny))})
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))})
.Finalize();
......@@ -19,9 +19,9 @@
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
namespace host {
class FillConstantCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
class FillConstantCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
public:
using param_t = operators::FillConstantParam;
......@@ -30,7 +30,7 @@ class FillConstantCompute : public KernelLite<TARGET(kARM), PRECISION(kAny)> {
~FillConstantCompute() {}
};
} // namespace arm
} // namespace host
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -24,6 +24,9 @@ else()
add_kernel(sequence_topk_avg_pooling_compute_xpu XPU basic SRCS sequence_topk_avg_pooling_compute.cc DEPS ${lite_kernel_deps})
add_kernel(concat_compute_xpu XPU basic SRCS concat_compute.cc DEPS ${lite_kernel_deps})
add_kernel(search_fc_compute_xpu XPU basic SRCS search_fc_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reshape_compute_xpu XPU basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reduce_mean_compute_xpu XPU basic SRCS reduce_mean_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reduce_sum_compute_xpu XPU basic SRCS reduce_sum_compute.cc DEPS ${lite_kernel_deps})
# extra
add_kernel(lookup_table_compute_xpu XPU extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps})
......@@ -44,4 +47,6 @@ else()
add_kernel(__xpu__fc_compute_xpu XPU extra SRCS __xpu__fc_compute.cc DEPS ${lite_kernel_deps})
add_kernel(__xpu__search_attention_compute_xpu XPU extra SRCS __xpu__search_attention_compute.cc DEPS ${lite_kernel_deps})
add_kernel(__xpu__mmdnn_compute_xpu XPU extra SRCS __xpu__mmdnn_compute.cc DEPS ${lite_kernel_deps})
add_kernel(__xpu__conv2d_compute_xpu XPU extra SRCS __xpu__conv2d_compute.cc DEPS ${lite_kernel_deps})
add_kernel(__xpu__sfa_head_compute_xpu XPU extra SRCS __xpu__sfa_head_compute.cc DEPS ${lite_kernel_deps})
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/__xpu__conv2d_compute.h"
#include <string>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void XPUConv2dCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& input_dims = param.Input->dims();
auto& filter_dims = param.Filter->dims();
int batch = static_cast<int>(input_dims[0]);
int img_c = static_cast<int>(input_dims[1]);
int img_h = static_cast<int>(input_dims[2]);
int img_w = static_cast<int>(input_dims[3]);
int filter_num = static_cast<int>(filter_dims[0]);
int win_h = static_cast<int>(filter_dims[2]);
int win_w = static_cast<int>(filter_dims[3]);
auto paddings = *param.paddings;
auto dilations = *param.dilations;
int stride_h = param.strides[0];
int stride_w = param.strides[1];
int paddings_h = paddings[0];
int paddings_w = paddings[1];
int dilations_h = dilations[0];
int dilations_w = dilations[1];
std::string filter_type = param.filter_type;
int groups = param.groups;
int act_type = (param.act_type == -1) ? xdnn::Activation_t::RELU
: param.act_type; // -1 means not init
const auto* bias = param.Bias ? param.Bias->data<float>() : nullptr;
const auto* branch = param.Branch ? param.Branch->data<float>() : nullptr;
const float* input_max =
param.InputMax ? param.InputMax->data<float>() : nullptr;
float* output_max = param.OutputMax
? param.OutputMax->mutable_data<float>(TARGET(kXPU))
: nullptr;
float* output = param.Output->mutable_data<float>(TARGET(kXPU));
// TODO(luohang): now support for resnet50 first
CHECK_EQ(act_type, xdnn::Activation_t::RELU);
CHECK_EQ(groups, 1);
CHECK_EQ(filter_type, "int16");
xdnn::Activation_t act((xdnn::Activation_t::act_enum)act_type);
int r = xdnn::conv2d_forward_int16<float, int16_t, float, float>(
ctx.GetRawContext(), /* context */
batch, /* batch */
img_c, /* input_c */
img_h, /* input_h */
img_w, /* input_w */
filter_num, /* num_filter */
win_h, /* kernel_h */
win_w, /* kernel_w */
stride_h, /* stride_h */
stride_w, /* stride_w */
paddings_h, /* pad_h */
paddings_w, /* pad_w */
dilations_h, /* dilation_h */
dilations_w, /* dilation_w */
groups, /* group */
param.Input->data<float>(), /* input bottom */
param.Filter->data<int16_t>(), /* filter weight */
output, /* output top */
bias, /* bias */
branch, /* branch */
act, /* act type */
input_max, /* max_image_ptr */
param.FilterMax->data<float>(), /* max_filter_ptr */
output_max /* max_result_ptr */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(__xpu__conv2d,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::XPUConv2dCompute,
def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Filter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("InputMax", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("FilterMax", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Branch", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("OutputMax", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class XPUConv2dCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::XPUConv2dParam;
virtual void Run();
virtual ~XPUConv2dCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/__xpu__sfa_head_compute.h"
#include <string>
#include "lite/backends/xpu/xpu_header_sitter.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void XPUSfaHeadCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
std::string vis_type = param.op_type;
auto input = param.input;
const int batch = static_cast<int>(input->dims()[0]);
const int m = static_cast<int>(input->dims()[1]);
const int n = static_cast<int>(input->dims()[2]);
if (vis_type == "meanstd") {
int r = xdnn::vis_meanstd(ctx.GetRawContext(),
param.input->data<float>(),
param.output->mutable_data<float>(TARGET(kXPU)),
batch,
m,
n);
CHECK_EQ(r, 0) << "XPU kernel error";
(void)param.output->mutable_data<float>();
} else if (vis_type == "moment") {
int r = xdnn::vis_moment(ctx.GetRawContext(),
param.input->data<float>(),
param.output->mutable_data<float>(TARGET(kXPU)),
batch,
m,
n);
CHECK_EQ(r, 0) << "XPU kernel error";
} else {
LOG(FATAL) << "vis xpu op not supported type " << vis_type.c_str();
}
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(__xpu__sfa_head,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::XPUSfaHeadCompute,
def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class XPUSfaHeadCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::XPUSfaHeadParam;
virtual void Run();
virtual ~XPUSfaHeadCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -60,6 +60,71 @@ void SigmoidCompute::Run() {
CHECK_EQ(r, 0);
}
void AbsCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::ABS, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
void SquareCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SQUARE, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
void SqrtCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SQRT, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
void PowCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::ACT_POW, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
void SignCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int r = xdnn::activation_forward(
ctx.GetRawContext(), /* context */
xdnn::Activation_t::SIGN, /* type */
param.X->numel(), /* len */
param.X->data<float>(), /* x */
param.Out->mutable_data<float>(TARGET(kXPU)) /* y */);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
......@@ -86,3 +151,33 @@ REGISTER_LITE_KERNEL(sigmoid,
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(
abs, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::AbsCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(
square, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::SquareCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(
sqrt, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::SqrtCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(
pow, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::PowCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(
sign, kXPU, kFloat, kNCHW, paddle::lite::kernels::xpu::SignCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
......@@ -48,6 +48,51 @@ class SigmoidCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
virtual ~SigmoidCompute() = default;
};
class AbsCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~AbsCompute() = default;
};
class SquareCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~SquareCompute() = default;
};
class SqrtCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~SqrtCompute() = default;
};
class PowCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~PowCompute() = default;
};
class SignCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
virtual void Run();
virtual ~SignCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
......
......@@ -76,6 +76,59 @@ void ElementwiseSubCompute::Run() {
}
}
void ElementwiseDivCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& x_dims = param.X->dims().data();
auto& y_dims = param.Y->dims();
int axis = param.axis;
if (param.axis == -1) {
axis = x_dims.size() - y_dims.size();
}
int iter = std::accumulate(
x_dims.begin(), x_dims.begin() + axis, 1, std::multiplies<int>());
int stride = param.Y->numel();
for (int i = 0; i < iter; ++i) {
const float* x_ptr = param.X->data<float>() + i * stride;
const float* y_ptr = param.Y->data<float>();
float* o_ptr = param.Out->mutable_data<float>(TARGET(kXPU)) + i * stride;
int r = xdnn::elementwise_div(ctx.GetRawContext(), /* context */
x_ptr, /* x */
y_ptr, /* y */
o_ptr, /* z */
stride /* len */);
CHECK_EQ(r, 0);
}
}
void ElementwiseMulCompute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
auto& x_dims = param.X->dims().data();
auto& y_dims = param.Y->dims();
int axis = param.axis;
if (param.axis == -1) {
axis = x_dims.size() - y_dims.size();
}
int iter = std::accumulate(
x_dims.begin(), x_dims.begin() + axis, 1, std::multiplies<int>());
int stride = param.Y->numel();
for (int i = 0; i < iter; ++i) {
const float* x_ptr = param.X->data<float>() + i * stride;
const float* y_ptr = param.Y->data<float>();
float* o_ptr = param.Out->mutable_data<float>(TARGET(kXPU)) + i * stride;
int r = xdnn::elementwise_mul(ctx.GetRawContext(), /* context */
x_ptr, /* x */
y_ptr, /* y */
o_ptr, /* z */
stride /* len */);
CHECK_EQ(r, 0);
}
}
} // namespace xpu
} // namespace kernels
} // namespace lite
......@@ -102,3 +155,25 @@ REGISTER_LITE_KERNEL(elementwise_sub,
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(elementwise_div,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::ElementwiseDivCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
REGISTER_LITE_KERNEL(elementwise_mul,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::ElementwiseMulCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
......@@ -41,6 +41,26 @@ class ElementwiseSubCompute
virtual ~ElementwiseSubCompute() = default;
};
class ElementwiseDivCompute
: public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseParam;
virtual void Run();
virtual ~ElementwiseDivCompute() = default;
};
class ElementwiseMulCompute
: public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ElementwiseParam;
virtual void Run();
virtual ~ElementwiseMulCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/reduce_mean_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void ReduceMeanCompute::Run() {
auto& param = Param<operators::ReduceMeanParam>();
auto& ctx = this->ctx_->As<XPUContext>();
const float* input = param.X->data<float>();
auto x_dims = param.X->dims();
int x_rank = x_dims.size();
float* output = param.Out->mutable_data<float>(TARGET(kXPU));
auto reduce_dim = param.dim;
std::vector<int> idims;
for (int i = 0; i < x_rank; i++) {
idims.push_back(x_dims[i]);
}
auto type = xdnn::ReduceOp::REDUCE_MEAN;
int r = xdnn::reduce(ctx.GetRawContext(),
input,
output,
idims.data(),
x_rank,
reduce_dim.data(),
reduce_dim.size(),
type);
CHECK_EQ(r, 0);
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(reduce_mean,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::ReduceMeanCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class ReduceMeanCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
void Run() override;
virtual ~ReduceMeanCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/reduce_sum_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
void ReduceSumCompute::Run() {
auto& param = Param<operators::ReduceParam>();
auto& ctx = this->ctx_->As<XPUContext>();
const float* input = param.x->data<float>();
float* output = param.output->mutable_data<float>(TARGET(kXPU));
bool reduce_all = param.reduce_all;
if (reduce_all) {
int input_len = param.x->numel();
int r = xdnn::sum(ctx.GetRawContext(), input, output, input_len);
CHECK_EQ(r, 0);
} else {
auto x_dims = param.x->dims();
int x_rank = x_dims.size();
auto reduce_dim = param.dim;
auto rdim = reduce_dim.size();
std::vector<int> idims;
for (int i = 0; i < x_rank; i++) {
idims.push_back(x_dims[i]);
}
auto type = xdnn::ReduceOp::REDUCE_SUM;
int r = xdnn::reduce(ctx.GetRawContext(),
input,
output,
idims.data(),
x_rank,
reduce_dim.data(),
rdim,
type);
CHECK_EQ(r, 0);
}
}
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(reduce_sum,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::ReduceSumCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
class ReduceSumCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
void Run() override;
virtual ~ReduceSumCompute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/xpu/reshape_compute.h"
#include "lite/core/op_registry.h"
REGISTER_LITE_KERNEL(reshape2,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::Reshape2Compute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("ShapeTensor", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Shape", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("XShape", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/kernel.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace xpu {
template <typename T>
class Reshape2Compute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::ReshapeParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto x = param.x;
auto output = param.output;
auto xshape = param.xshape;
auto x_dims = x->dims();
auto x_dims_data = x_dims.Vectorize();
auto out_dims = output->dims();
output->ShareDataWith(*x);
output->Resize(out_dims);
auto* xshape_data = xshape->mutable_data<int64_t>(TARGET(kXPU));
TargetWrapperXPU::MemcpySync(xshape_data,
x_dims_data.data(),
x_dims.size() * sizeof(int64_t),
IoDirection::HtoD);
}
virtual ~Reshape2Compute() = default;
};
} // namespace xpu
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -53,6 +53,8 @@ add_operator(instance_norm_op basic SRCS instance_norm_op.cc DEPS ${op_DEPS})
add_operator(subgraph_op basic SRCS subgraph_op.cc DEPS ${op_DEPS})
add_operator(grid_sampler_op basic SRCS grid_sampler_op.cc DEPS ${op_DEPS})
add_operator(flatten_op basic SRCS flatten_op.cc DEPS ${op_DEPS})
add_operator(pow_op extra SRCS pow_op.cc DEPS ${op_DEPS})
add_operator(sign_op extra SRCS sign_op.cc DEPS ${op_DEPS})
# 2.basic ops not used in basic models
add_operator(negative_op extra SRCS negative_op.cc DEPS ${op_DEPS})
......@@ -177,6 +179,9 @@ add_operator(__xpu__fc_op extra SRCS __xpu__fc_op.cc DEPS ${op_DEPS})
add_operator(__xpu__resnet_cbam_op extra SRCS __xpu__resnet_cbam_op.cc DEPS ${op_DEPS})
add_operator(__xpu__search_attention_op extra SRCS __xpu__search_attention_op.cc DEPS ${op_DEPS})
add_operator(__xpu__mmdnn_op extra SRCS __xpu__mmdnn_op.cc DEPS ${op_DEPS})
add_operator(__xpu__conv2d_op extra SRCS __xpu__conv2d_op.cc DEPS ${op_DEPS})
add_operator(__xpu__sfa_head_op extra SRCS __xpu__sfa_head_op.cc DEPS ${op_DEPS})
if (NOT LITE_WITH_X86)
lite_cc_test(test_one_hot_op SRCS one_hot_op_test.cc DEPS one_hot_op memory scope ${op_deps} one_hot_compute_host)
lite_cc_test(test_fc_op SRCS fc_op_test.cc
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/__xpu__conv2d_op.h"
#include <memory>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/operators/conv_op.h"
namespace paddle {
namespace lite {
namespace operators {
std::string padding_algorithm_ = ""; // NOLINT
bool XPUConv2dOp::CheckShape() const {
CHECK(param_.Input) << "Input(Input) of ConvXPUOp should not be null.";
CHECK(param_.Output) << "Input(Filter) of ConvXPUOp should not be null.";
CHECK(param_.Filter) << "Output(Output) of ConvXPUOp should not be null.";
// bias is optional.
const auto in_dims = param_.Input->dims();
const auto filter_dims = param_.Filter->dims();
int groups = param_.groups;
CHECK_EQ(in_dims.size(), 4UL) << "Conv intput should be 4-D tensor.";
CHECK_EQ(in_dims.size(), filter_dims.size())
<< "Conv input dimension and filter dimension should be the same.";
CHECK_EQ(in_dims.size() - param_.strides.size(), 2U)
<< "Conv input dimension and strides dimension should be consistent.";
CHECK_EQ(filter_dims.size(), 4UL) << "Conv filter should be 4-D tensor.";
CHECK_EQ(in_dims[1], filter_dims[1] * groups)
<< "The number of input channels should be equal to filter channels * "
"groups.";
CHECK_EQ(filter_dims[0] % groups, 0)
<< "The number of output channels should be divided by groups.";
return true;
}
// copy from conv_op.cc
inline int ConvOutputSize(int input_size,
int filter_size,
int dilation,
int pad_left,
int pad_right,
int stride) {
const int dkernel = dilation * (filter_size - 1) + 1;
int output_size =
(input_size + (pad_left + pad_right) - dkernel) / stride + 1;
return output_size;
}
// copy from conv_op.cc
bool XPUConv2dOp::InferShapeImpl() const {
const auto in_dims = param_.Input->dims();
const auto filter_dims = param_.Filter->dims();
operators::UpdatePaddingAndDilation(param_.paddings.get(),
param_.dilations.get(),
param_.strides,
padding_algorithm_,
in_dims,
filter_dims);
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
auto paddings = *param_.paddings;
auto dilations = *param_.dilations;
for (size_t i = 0; i < param_.strides.size(); ++i) {
output_shape.push_back(ConvOutputSize(in_dims[i + 2],
filter_dims[i + 2],
dilations[i],
paddings[i * 2],
paddings[i * 2 + 1],
param_.strides[i]));
}
// Set output and output max dims
param_.Output->Resize(lite::DDim(output_shape));
param_.OutputMax->Resize({4});
// share LoD
param_.Output->set_lod(param_.Input->lod());
return true;
}
bool XPUConv2dOp::AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) {
AttachParam(&param_);
CHECK(scope->FindVar(op_desc.Input("Input").front()));
CHECK(scope->FindVar(op_desc.Input("Filter").front()));
CHECK(scope->FindVar(op_desc.Input("FilterMax").front()));
CHECK(scope->FindVar(op_desc.Output("Output").front()));
CHECK(scope->FindVar(op_desc.Output("OutputMax").front()));
param_.Input =
scope->FindVar(op_desc.Input("Input").front())->GetMutable<Tensor>();
param_.Filter =
scope->FindVar(op_desc.Input("Filter").front())->GetMutable<Tensor>();
param_.FilterMax =
scope->FindVar(op_desc.Input("FilterMax").front())->GetMutable<Tensor>();
auto bias = scope->FindVar(op_desc.Input("Bias").front());
if (bias != nullptr) {
param_.Bias = bias->GetMutable<Tensor>();
}
// optional params
std::vector<std::string> input_arg_names = op_desc.InputArgumentNames();
if (std::find(input_arg_names.begin(), input_arg_names.end(), "Branch") !=
input_arg_names.end()) {
auto arguments = op_desc.Input("Branch");
if (arguments.size() > 0) {
auto arg_var = scope->FindVar(arguments.front());
if (arg_var != nullptr) {
param_.Branch =
const_cast<lite::Tensor*>(&(arg_var->Get<lite::Tensor>()));
}
}
}
param_.Output =
scope->FindVar(op_desc.Output("Output").front())->GetMutable<Tensor>();
param_.OutputMax =
scope->FindVar(op_desc.Output("OutputMax").front())->GetMutable<Tensor>();
param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
auto dilations = op_desc.GetAttr<std::vector<int>>("dilations");
param_.dilations = std::make_shared<std::vector<int>>(dilations);
param_.groups = op_desc.GetAttr<int>("groups");
if (op_desc.HasAttr("act_type")) {
param_.act_type = op_desc.GetAttr<int>("act_type");
}
if (op_desc.HasAttr("filter_type")) {
param_.filter_type = op_desc.GetAttr<std::string>("filter_type");
} else {
param_.filter_type = "int16";
}
if (op_desc.HasAttr("has_input_max") &&
op_desc.GetAttr<bool>("has_input_max")) {
CHECK(scope->FindVar(op_desc.Input("InputMax").front()));
param_.InputMax =
scope->FindVar(op_desc.Input("InputMax").front())->GetMutable<Tensor>();
}
if (op_desc.HasAttr("padding_algorithm")) {
padding_algorithm_ = op_desc.GetAttr<std::string>("padding_algorithm");
}
// 2-pad to 4-pad
if (paddings.size() == 2L) {
for (size_t i = 0; i < param_.strides.size(); ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
} else {
if (paddings.size() != 4L) {
LOG(FATAL)
<< "Paddings size should be the same or twice as the input size.";
}
}
param_.paddings = std::make_shared<std::vector<int>>(paddings);
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(__xpu__conv2d, paddle::lite::operators::XPUConv2dOp);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "lite/core/op_lite.h"
namespace paddle {
namespace lite {
namespace operators {
class XPUConv2dOp : public OpLite {
public:
XPUConv2dOp() {}
explicit XPUConv2dOp(const std::string &op_type) : OpLite(op_type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "XPUConv2d"; }
private:
mutable XPUConv2dParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/__xpu__sfa_head_op.h"
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool XPUSfaHeadOp::CheckShape() const {
CHECK_OR_FALSE(param_.input);
CHECK_OR_FALSE(param_.output);
CHECK_OR_FALSE(param_.op_type != "");
const auto input_dims = param_.input->dims();
if (param_.op_type == "meanstd" || param_.op_type == "moment") {
CHECK_EQ_OR_FALSE(input_dims.size(), 3UL);
}
return true;
}
bool XPUSfaHeadOp::InferShapeImpl() const {
const auto& input_dims = param_.input->dims();
auto op_type = param_.op_type;
// Set output dims
std::vector<DDim::value_type> output_dims(2);
output_dims[0] = input_dims[0];
if (op_type == "meanstd") {
output_dims[1] = 2 * input_dims[1];
} else if (op_type == "moment") {
output_dims[1] = 4 * input_dims[1];
} else {
LOG(FATAL) << "not supported vis op --> " << op_type;
}
param_.output->Resize(output_dims);
// share LoD
param_.output->set_lod(param_.input->lod());
return true;
}
bool XPUSfaHeadOp::AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) {
auto input = op_desc.Input("Input").front();
auto output = op_desc.Output("Output").front();
CHECK(scope->FindVar(input));
CHECK(scope->FindVar(output));
param_.input = scope->FindVar(input)->GetMutable<lite::Tensor>();
param_.output = scope->FindVar(output)->GetMutable<lite::Tensor>();
param_.op_type = op_desc.GetAttr<std::string>("op_type");
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(__xpu__sfa_head, paddle::lite::operators::XPUSfaHeadOp);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "lite/core/op_lite.h"
namespace paddle {
namespace lite {
namespace operators {
class XPUSfaHeadOp : public OpLite {
public:
XPUSfaHeadOp() {}
explicit XPUSfaHeadOp(const std::string &op_type) : OpLite(op_type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "XPUFc"; }
private:
mutable XPUSfaHeadParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -232,6 +232,20 @@ struct PowerParam : ParamBase {
float power{};
};
// For Pow Op
struct PowParam : ParamBase {
const lite::Tensor* X{};
lite::Tensor* Out{};
float factor{1.};
};
// For Sign Op
struct SignParam : ParamBase {
const lite::Tensor* X{};
lite::Tensor* Out{};
};
struct ShuffleChannelParam : ParamBase {
const lite::Tensor* X{};
lite::Tensor* Out{};
......@@ -1810,6 +1824,31 @@ struct XPUMmdnnMergeAllParam : ParamBase {
lite::Tensor* out{};
};
struct XPUConv2dParam : ParamBase {
lite::Tensor* Input{nullptr};
lite::Tensor* Filter{nullptr};
lite::Tensor* InputMax{nullptr};
lite::Tensor* FilterMax{nullptr};
lite::Tensor* Bias{nullptr};
lite::Tensor* Branch{nullptr};
lite::Tensor* Output{nullptr};
lite::Tensor* OutputMax{nullptr};
int groups{1};
int act_type{-1};
std::string filter_type{""};
std::vector<int> strides;
std::shared_ptr<std::vector<int>> paddings;
std::shared_ptr<std::vector<int>> dilations;
};
struct XPUSfaHeadParam : ParamBase {
lite::Tensor* input{nullptr};
lite::Tensor* output{nullptr};
std::string op_type{""};
};
// For DeformableConvolution op
struct DeformableConvParam : ParamBase {
lite::Tensor* x{};
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/pow_op.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
namespace operators {
bool PowOp::CheckShape() const {
CHECK_OR_FALSE(param_.X);
CHECK_OR_FALSE(param_.Out);
return true;
}
bool PowOp::InferShapeImpl() const {
param_.Out->Resize(param_.X->dims());
return true;
}
bool PowOp::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
auto X = op_desc.Input("X").front();
auto Out = op_desc.Output("Out").front();
CHECK(scope->FindVar(X));
CHECK(scope->FindVar(Out));
param_.X = scope->FindVar(X)->GetMutable<lite::Tensor>();
param_.Out = scope->FindVar(Out)->GetMutable<lite::Tensor>();
param_.factor = op_desc.GetAttr<float>("factor");
CHECK(param_.X);
CHECK(param_.Out);
return true;
}
} /* namespace operators */
} /* namespace lite */
} /* namespace paddle */
REGISTER_LITE_OP(pow, paddle::lite::operators::PowOp);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/core/scope.h"
#include "lite/utils/all.h"
namespace paddle {
namespace lite {
namespace operators {
class PowOp : public OpLite {
public:
PowOp() {}
explicit PowOp(const std::string &op_type) : OpLite(op_type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "pow"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->macs = param_.Out->numel();
}
#endif
private:
mutable PowParam param_;
};
} /* namespace operators */
} /* namespace lite */
} /* namespace paddle */
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/sign_op.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
namespace operators {
bool SignOp::CheckShape() const {
CHECK_OR_FALSE(param_.X);
CHECK_OR_FALSE(param_.Out);
return true;
}
bool SignOp::InferShapeImpl() const {
param_.Out->Resize(param_.X->dims());
return true;
}
bool SignOp::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
auto X = op_desc.Input("X").front();
auto Out = op_desc.Output("Out").front();
CHECK(scope->FindVar(X));
CHECK(scope->FindVar(Out));
param_.X = scope->FindVar(X)->GetMutable<lite::Tensor>();
param_.Out = scope->FindVar(Out)->GetMutable<lite::Tensor>();
CHECK(param_.X);
CHECK(param_.Out);
return true;
}
} /* namespace operators */
} /* namespace lite */
} /* namespace paddle */
REGISTER_LITE_OP(sign, paddle::lite::operators::SignOp);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/core/scope.h"
#include "lite/utils/all.h"
namespace paddle {
namespace lite {
namespace operators {
class SignOp : public OpLite {
public:
SignOp() {}
explicit SignOp(const std::string &op_type) : OpLite(op_type) {}
bool CheckShape() const override;
bool InferShapeImpl() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "sign"; }
#ifdef LITE_WITH_PROFILE
void GetOpRuntimeInfo(paddle::lite::profile::OpCharacter *ch) {
ch->input_shape = ch->DimToStr(param_.X->dims());
ch->output_shape = ch->DimToStr(param_.Out->dims());
ch->macs = param_.Out->numel();
}
#endif
private:
mutable SignParam param_;
};
} /* namespace operators */
} /* namespace lite */
} /* namespace paddle */
......@@ -135,8 +135,8 @@ TEST(fill_constant_batch_size_like, precision) {
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#elif defined(LITE_WITH_ARM) || defined(LITE_WITH_X86)
place = TARGET(kHost);
#else
return;
#endif
......
......@@ -174,8 +174,8 @@ TEST(fill_constant, precision) {
#if defined(LITE_WITH_NPU)
place = TARGET(kNPU);
abs_error = 1e-2; // use fp16 in npu
#elif defined(LITE_WITH_ARM)
place = TARGET(kARM);
#elif defined(LITE_WITH_ARM) || defined(LITE_WITH_X86)
place = TARGET(kHost);
#else
return;
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册