提交 dfe0c436 编写于 作者: W weihaoji

[XPU] add res2net fusion

test=develop, test=xpu
上级 0348583f
...@@ -62,6 +62,7 @@ USE_MIR_PASS(quantized_op_attributes_inference_pass); ...@@ -62,6 +62,7 @@ USE_MIR_PASS(quantized_op_attributes_inference_pass);
USE_MIR_PASS(control_flow_op_unused_inputs_and_outputs_eliminate_pass) USE_MIR_PASS(control_flow_op_unused_inputs_and_outputs_eliminate_pass)
USE_MIR_PASS(lite_scale_activation_fuse_pass); USE_MIR_PASS(lite_scale_activation_fuse_pass);
USE_MIR_PASS(__xpu__resnet_fuse_pass); USE_MIR_PASS(__xpu__resnet_fuse_pass);
USE_MIR_PASS(__xpu__res2net_fuse_pass);
USE_MIR_PASS(__xpu__resnet_d_fuse_pass); USE_MIR_PASS(__xpu__resnet_d_fuse_pass);
USE_MIR_PASS(__xpu__resnet_cbam_fuse_pass); USE_MIR_PASS(__xpu__resnet_cbam_fuse_pass);
USE_MIR_PASS(__xpu__multi_encoder_fuse_pass); USE_MIR_PASS(__xpu__multi_encoder_fuse_pass);
......
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
#pragma GCC system_header #pragma GCC system_header
#include <xpu/api.h> #include <xpu/api.h>
#include <xpu/golden.h> #include <xpu/golden.h>
#include <xpu/refactor/fusion.h>
#include <xpu/refactor/math.h>
#include <xpu/refactor/nn.h>
#include <xpu/runtime.h> #include <xpu/runtime.h>
#if defined(LITE_WITH_XTCL) #if defined(LITE_WITH_XTCL)
......
...@@ -25,6 +25,7 @@ lite_cc_library(mir_passes ...@@ -25,6 +25,7 @@ lite_cc_library(mir_passes
fusion/scale_activation_fuse_pass.cc fusion/scale_activation_fuse_pass.cc
fusion/reshape_fuse_pass.cc fusion/reshape_fuse_pass.cc
fusion/__xpu__resnet_fuse_pass.cc fusion/__xpu__resnet_fuse_pass.cc
fusion/__xpu__res2net_fuse_pass.cc
fusion/__xpu__resnet_cbam_fuse_pass.cc fusion/__xpu__resnet_cbam_fuse_pass.cc
fusion/__xpu__multi_encoder_fuse_pass.cc fusion/__xpu__multi_encoder_fuse_pass.cc
fusion/__xpu__embedding_with_eltwise_add_fuse_pass.cc fusion/__xpu__embedding_with_eltwise_add_fuse_pass.cc
......
此差异已折叠。
...@@ -109,6 +109,7 @@ class Optimizer { ...@@ -109,6 +109,7 @@ class Optimizer {
"identity_dropout_eliminate_pass", "identity_dropout_eliminate_pass",
"__xpu__resnet_fuse_pass", "__xpu__resnet_fuse_pass",
"__xpu__resnet_d_fuse_pass", "__xpu__resnet_d_fuse_pass",
"__xpu__res2net_fuse_pass",
"__xpu__resnet_cbam_fuse_pass", "__xpu__resnet_cbam_fuse_pass",
"__xpu__conv2d_fuse_pass", "__xpu__conv2d_fuse_pass",
"__xpu__conv2d_link_previous_out_max_pass", "__xpu__conv2d_link_previous_out_max_pass",
......
...@@ -49,6 +49,21 @@ void XPUResNet50DtypeCompute::PrepareForRun() { ...@@ -49,6 +49,21 @@ void XPUResNet50DtypeCompute::PrepareForRun() {
} }
} }
void XPURes2Net50Compute::PrepareForRun() {
auto& param = this->Param<param_t>();
for (auto* filter : param.filter) {
arg_filter_.push_back(
reinterpret_cast<const int16_t*>(filter->data<float>()));
}
for (auto* bias : param.bias) {
arg_bias_.push_back(bias->data<float>());
}
for (auto* max_filter : param.max_filter) {
arg_max_filter_.push_back(max_filter->data<float>());
}
}
void XPUResNet50Compute::Run() { void XPUResNet50Compute::Run() {
auto& param = this->Param<param_t>(); auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>(); auto& ctx = this->ctx_->As<XPUContext>();
...@@ -81,6 +96,22 @@ void XPUResNet50DtypeCompute::Run() { ...@@ -81,6 +96,22 @@ void XPUResNet50DtypeCompute::Run() {
CHECK_EQ(r, 0); CHECK_EQ(r, 0);
} }
void XPURes2Net50Compute::Run() {
auto& param = this->Param<param_t>();
auto& ctx = this->ctx_->As<XPUContext>();
int batch_size = param.input->dims()[0];
int r = xdnn::conv2d_int16_res2net<float, int16_t>(
ctx.GetRawContext(), /* context */
batch_size, /* num */
param.input->data<float>(), /* bottom */
&arg_filter_[0], /* weight_list */
param.output->mutable_data<float>(TARGET(kXPU)), /* top */
&arg_bias_[0], /* bias_list */
&arg_max_filter_[0] /* max_filter_list */);
CHECK_EQ(r, 0);
}
} // namespace xpu } // namespace xpu
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
...@@ -111,3 +142,16 @@ REGISTER_LITE_KERNEL(__xpu__resnet50_d, ...@@ -111,3 +142,16 @@ REGISTER_LITE_KERNEL(__xpu__resnet50_d,
.BindInput("MaxFilter", {LiteType::GetTensorTy(TARGET(kXPU))}) .BindInput("MaxFilter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))}) .BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize(); .Finalize();
REGISTER_LITE_KERNEL(__xpu__res2net50,
kXPU,
kFloat,
kNCHW,
paddle::lite::kernels::xpu::XPURes2Net50Compute,
def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Filter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindInput("MaxFilter", {LiteType::GetTensorTy(TARGET(kXPU))})
.BindOutput("Output", {LiteType::GetTensorTy(TARGET(kXPU))})
.Finalize();
...@@ -53,6 +53,20 @@ class XPUResNet50DtypeCompute ...@@ -53,6 +53,20 @@ class XPUResNet50DtypeCompute
std::vector<const float *> arg_bias_; std::vector<const float *> arg_bias_;
}; };
class XPURes2Net50Compute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
public:
using param_t = operators::XPUResNet50Param;
virtual void PrepareForRun();
virtual void Run();
private:
std::vector<const int16_t *> arg_filter_;
std::vector<const float *> arg_max_filter_;
std::vector<const float *> arg_bias_;
};
} // namespace xpu } // namespace xpu
} // namespace kernels } // namespace kernels
} // namespace lite } // namespace lite
......
...@@ -63,3 +63,4 @@ bool XPUResNet50Op::AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) { ...@@ -63,3 +63,4 @@ bool XPUResNet50Op::AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) {
REGISTER_LITE_OP(__xpu__resnet50, paddle::lite::operators::XPUResNet50Op); REGISTER_LITE_OP(__xpu__resnet50, paddle::lite::operators::XPUResNet50Op);
REGISTER_LITE_OP(__xpu__resnet50_d, paddle::lite::operators::XPUResNet50Op); REGISTER_LITE_OP(__xpu__resnet50_d, paddle::lite::operators::XPUResNet50Op);
REGISTER_LITE_OP(__xpu__res2net50, paddle::lite::operators::XPUResNet50Op);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册