提交 7421f560 编写于 作者: Y yangfei

imp fusion_conv_add_prelu and fusion_conv_add_add_prelu op

上级 a1a7b05b
......@@ -18,10 +18,10 @@ limitations under the License. */
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
template <typename Dtype, typename T>
void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
......@@ -42,9 +42,9 @@ namespace paddle_mobile {
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
}
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
......
......@@ -24,24 +24,24 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD)
> std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"Out", "addOut"},{"X", "addX"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}
},
{{G_OP_TYPE_ELEMENTWISE_ADD,
{{"Y", "Y"}, {"Out", "addOut"}, {"X", "addX"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}},
removed_nodes);
}
......@@ -51,10 +51,11 @@ namespace paddle_mobile {
DLOG << " conv add add prelu check add X ";
return {{2, "Y"}, {2, "X"}};
}
};
};
template <typename DeviceType, typename T>
class FusionConvAddAddPReluOp : public framework::OperatorWithKernel<
template <typename DeviceType, typename T>
class FusionConvAddAddPReluOp
: public framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>> {
public:
......@@ -64,21 +65,22 @@ namespace paddle_mobile {
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
operators::ConvAddAddPReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
};
#ifdef PADDLE_MOBILE_CPU
#ifndef CONV_ADD_ADD_PRELU_REGISTER
#define CONV_ADD_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
new FusionConvAddAddPReluOpMatcher());
#endif
......@@ -87,7 +89,7 @@ namespace paddle_mobile {
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef CONV_ADD_ADD_PRELU_REGISTER
#ifndef CONV_ADD_ADD_PRELU_REGISTER
#define CONV_ADD_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
new FusionConvAddAddPReluOpMatcher());
......@@ -95,7 +97,7 @@ static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
#endif
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
......
......@@ -18,10 +18,10 @@ limitations under the License. */
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
template <typename Dtype, typename T>
void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
......@@ -42,14 +42,14 @@ namespace paddle_mobile {
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
}
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_prelu,ops::FusionConvAddPReluOp);
REGISTER_OPERATOR_CPU(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
......
......@@ -24,9 +24,9 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher {
class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
......@@ -43,14 +43,14 @@ namespace paddle_mobile {
},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; }
};
};
template <typename DeviceType, typename T>
class FusionConvAddPReluOp : public framework::OperatorWithKernel<
template <typename DeviceType, typename T>
class FusionConvAddPReluOp
: public framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>> {
public:
......@@ -69,13 +69,13 @@ namespace paddle_mobile {
void InferShape() const override;
protected:
};
};
#ifdef PADDLE_MOBILE_CPU
#ifndef CONV_ADD_PRELU_REGISTER
#define CONV_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
new FusionConvAddPReluOpMatcher());
#endif
......@@ -84,7 +84,7 @@ namespace paddle_mobile {
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef CONV_ADD_PRELU_REGISTER
#ifndef CONV_ADD_PRELU_REGISTER
#define CONV_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
new FusionConvAddPReluOpMatcher());
......@@ -92,7 +92,7 @@ static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
#endif
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
......
......@@ -18,21 +18,22 @@ limitations under the License. */
#include "operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <>
bool ConvAddAddPReluKernel<CPU, float>::Init(FusionConvAddAddPReluParam<CPU> *param) {
template <>
bool ConvAddAddPReluKernel<CPU, float>::Init(
FusionConvAddAddPReluParam<CPU> *param) {
return true;
}
}
template <>
void ConvAddAddPReluKernel<CPU, float>::Compute(
template <>
void ConvAddAddPReluKernel<CPU, float>::Compute(
const FusionConvAddAddPReluParam<CPU> &param) const {
ConvAddAddPReluCompute<float>(param);
}
template class ConvAddAddPReluKernel<CPU, float>;
}
template class ConvAddAddPReluKernel<CPU, float>;
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -18,21 +18,21 @@ limitations under the License. */
#include "operators/kernel/central-arm-func/conv_add_prelu_arm_func.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <>
bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) {
template <>
bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) {
return true;
}
}
template <>
void ConvAddPReluKernel<CPU, float>::Compute(
template <>
void ConvAddPReluKernel<CPU, float>::Compute(
const FusionConvAddPReluParam<CPU> &param) const {
ConvAddPReluCompute<float>(param);
}
template class ConvAddPReluKernel<CPU, float>;
}
template class ConvAddPReluKernel<CPU, float>;
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -23,10 +23,10 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <typename P>
void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam<CPU> &param) {
template <typename P>
void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
......@@ -91,7 +91,7 @@ namespace paddle_mobile {
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor bias1_batch = bias1.Slice(i,i+1).Resize(output_matrix_shape);
Tensor bias1_batch = bias1.Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
......@@ -115,23 +115,26 @@ namespace paddle_mobile {
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
Tensor bias1_slice = bias1_batch.Slice(g * out_step, (g + 1) * out_step);
float *biase_data1 = bias1_slice.data<float>();
// int n = bias1_slice.dims()[0];
// int m = bias1_slice.dims()[1];
// for(int i=0;i<n*m;i++){
// if(biase_data1[i]!=0)
// DLOG<<biase_data1[i]<<",yangfei";
// }
// math::matmul<float>(filter_slice, false, col_matrix, false,
// static_cast<float>(1), &out_slice,
// static_cast<float>(1), true, biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false,
&out_slice, p,mode, biase_data,biase_data1);
}
// int n = bias1_slice.dims()[0];
// int m = bias1_slice.dims()[1];
// for(int i=0;i<n*m;i++){
// if(biase_data1[i]!=0)
// DLOG<<biase_data1[i]<<",yangfei";
// }
// math::matmul<float>(filter_slice, false, col_matrix,
// false,
// static_cast<float>(1),
// &out_slice,
// static_cast<float>(1), true,
// biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice,
p, mode, biase_data, biase_data1);
}
}
}
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -23,15 +23,15 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <typename P>
void ConvAddPReluCompute(const FusionConvAddPReluParam<CPU> &param) {
template <typename P>
void ConvAddPReluCompute(const FusionConvAddPReluParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
// DLOG<<"yangfei";
// DLOG<<bias.dims();
// DLOG<<"yangfei";
// DLOG<<bias.dims();
int axis = param.Axis();
Tensor *output = param.Output();
float *biase_data = bias.data<float>();
......@@ -112,16 +112,19 @@ namespace paddle_mobile {
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
// math::matmul<float>(filter_slice, false, col_matrix, false,
// static_cast<float>(1), &out_slice,
// static_cast<float>(1), true, biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false,
&out_slice, p,mode, biase_data, nullptr);
}
// math::matmul<float>(filter_slice, false, col_matrix,
// false,
// static_cast<float>(1),
// &out_slice,
// static_cast<float>(1), true,
// biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice,
p, mode, biase_data, nullptr);
}
}
}
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -26,20 +26,20 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
using framework::DDim;
using framework::OpKernelBase;
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddAddPReluKernel
template <typename DeviceType, typename T>
class ConvAddAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddAddPReluParam<DeviceType>> {
public:
void Compute(const FusionConvAddAddPReluParam<DeviceType> &param) const;
bool Init(FusionConvAddAddPReluParam<DeviceType> *param);
};
};
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -26,20 +26,20 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
using framework::DDim;
using framework::OpKernelBase;
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddPReluKernel
template <typename DeviceType, typename T>
class ConvAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddPReluParam<DeviceType>> {
public:
void Compute(const FusionConvAddPReluParam<DeviceType> &param) const;
bool Init(FusionConvAddPReluParam<DeviceType> *param);
};
};
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -3172,7 +3172,7 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda,
int max_threads = 1;
#endif
int L1 = 16 / max_threads * 1024;
int L1 = 32 * 1024;
KC = k;
if (m > n) {
// 对 A 分块
......
......@@ -110,9 +110,8 @@ void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a,
int K = (!trans_a) ? dim_a[1] : dim_a[0];
#ifdef _OPENMP
xsSgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K,
matrix_b.data<float>(), N, matrix_out->data<float>(), N,
p, mode, bias, bias1);
SgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K, matrix_b.data<float>(),
N, matrix_out->data<float>(), N, p, mode, bias, bias1);
#else
SgemmWithPRelu(M, N, K, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
matrix_out->data<float>(), N, p, mode, bias, bias1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册