提交 19e65299 编写于 作者: Y yangfei

imp fusion_conv_add_prelu and fusion_conv_add_add_prelu op

上级 29316335
......@@ -18,33 +18,33 @@ limitations under the License. */
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
......
......@@ -24,62 +24,64 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD)
> std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}, {"Out", "addOut"},{"X", "addX"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}
},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU; }
std::vector<std::pair<int, std::string>> NeedCheck() {
DLOG << " conv add add prelu check add X ";
return {{2, "Y"}, {2, "X"}};
}
};
template <typename DeviceType, typename T>
class FusionConvAddAddPReluOp : public framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>> {
public:
FusionConvAddAddPReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
namespace operators {
class FusionConvAddAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD,
{{"Y", "Y"}, {"Out", "addOut"}, {"X", "addX"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU; }
std::vector<std::pair<int, std::string>> NeedCheck() {
DLOG << " conv add add prelu check add X ";
return {{2, "Y"}, {2, "X"}};
}
};
template <typename DeviceType, typename T>
class FusionConvAddAddPReluOp
: public framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>> {
public:
FusionConvAddAddPReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef CONV_ADD_ADD_PRELU_REGISTER
#define CONV_ADD_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
new FusionConvAddAddPReluOpMatcher());
static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
new FusionConvAddAddPReluOpMatcher());
#endif
#endif
......@@ -87,7 +89,7 @@ namespace paddle_mobile {
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef CONV_ADD_ADD_PRELU_REGISTER
#ifndef CONV_ADD_ADD_PRELU_REGISTER
#define CONV_ADD_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
new FusionConvAddAddPReluOpMatcher());
......@@ -95,7 +97,7 @@ static framework::FusionOpRegistrar fusion_conv_add_add_prelu_registrar(
#endif
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
......
......@@ -18,38 +18,38 @@ limitations under the License. */
#include "operators/math/conv_func.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
namespace operators {
template <typename Dtype, typename T>
void FusionConvAddPReluOp<Dtype, T>::InferShape() const {
auto in_dims = this->param_.Input()->dims();
auto filter_dims = this->param_.Filter()->dims();
const std::vector<int> &strides = this->param_.Strides();
std::vector<int> paddings = this->param_.Paddings();
int groups = this->param_.Groups();
std::vector<int> dilations = this->param_.Dilations();
PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
dilations.size() == paddings.size() &&
paddings.size() == strides.size()),
"ConvParam is not suitable");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
for (size_t i = 0; i < strides.size(); ++i) {
output_shape.push_back(
math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
paddings[i], strides[i]));
}
framework::DDim ddim = framework::make_ddim(output_shape);
this->param_.Output()->Resize(ddim);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fusion_conv_add_prelu,ops::FusionConvAddPReluOp);
REGISTER_OPERATOR_CPU(fusion_conv_add_prelu, ops::FusionConvAddPReluOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
......
......@@ -24,59 +24,59 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}
},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; }
};
template <typename DeviceType, typename T>
class FusionConvAddPReluOp : public framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>> {
public:
FusionConvAddPReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
namespace operators {
class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher {
public:
FusionConvAddPReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) >
std::make_shared<framework::Node>(G_OP_TYPE_PRELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}
},
removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; }
};
template <typename DeviceType, typename T>
class FusionConvAddPReluOp
: public framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>> {
public:
FusionConvAddPReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
#ifdef PADDLE_MOBILE_CPU
#ifndef CONV_ADD_PRELU_REGISTER
#define CONV_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
new FusionConvAddPReluOpMatcher());
static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
new FusionConvAddPReluOpMatcher());
#endif
#endif
......@@ -84,7 +84,7 @@ namespace paddle_mobile {
#endif
#ifdef PADDLE_MOBILE_FPGA
#ifndef CONV_ADD_PRELU_REGISTER
#ifndef CONV_ADD_PRELU_REGISTER
#define CONV_ADD_PRELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
new FusionConvAddPReluOpMatcher());
......@@ -92,7 +92,7 @@ static framework::FusionOpRegistrar fusion_conv_add_prelu_registrar(
#endif
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
......
......@@ -18,21 +18,22 @@ limitations under the License. */
#include "operators/kernel/central-arm-func/conv_add_add_prelu_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddAddPReluKernel<CPU, float>::Init(FusionConvAddAddPReluParam<CPU> *param) {
return true;
}
template <>
void ConvAddAddPReluKernel<CPU, float>::Compute(
const FusionConvAddAddPReluParam<CPU> &param) const {
ConvAddAddPReluCompute<float>(param);
}
template class ConvAddAddPReluKernel<CPU, float>;
} // namespace operators
namespace operators {
template <>
bool ConvAddAddPReluKernel<CPU, float>::Init(
FusionConvAddAddPReluParam<CPU> *param) {
return true;
}
template <>
void ConvAddAddPReluKernel<CPU, float>::Compute(
const FusionConvAddAddPReluParam<CPU> &param) const {
ConvAddAddPReluCompute<float>(param);
}
template class ConvAddAddPReluKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -18,21 +18,21 @@ limitations under the License. */
#include "operators/kernel/central-arm-func/conv_add_prelu_arm_func.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <>
bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) {
return true;
}
template <>
bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) {
return true;
}
template <>
void ConvAddPReluKernel<CPU, float>::Compute(
const FusionConvAddPReluParam<CPU> &param) const {
ConvAddPReluCompute<float>(param);
}
template class ConvAddPReluKernel<CPU, float>;
template <>
void ConvAddPReluKernel<CPU, float>::Compute(
const FusionConvAddPReluParam<CPU> &param) const {
ConvAddPReluCompute<float>(param);
}
template class ConvAddPReluKernel<CPU, float>;
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -23,115 +23,118 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
Tensor bias1 = *param.Bias1();
int axis = param.Axis();
Tensor *output = param.Output();
float *biase_data = bias.data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
Tensor aa = *param.InputAlpha();
float *p = aa.data<float>();
std::string mode = param.Mode();
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor bias1_batch = bias1.Slice(i,i+1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
Tensor bias1_slice = bias1_batch.Slice(g * out_step, (g + 1) * out_step);
float *biase_data1 = bias1_slice.data<float>();
// int n = bias1_slice.dims()[0];
// int m = bias1_slice.dims()[1];
// for(int i=0;i<n*m;i++){
// if(biase_data1[i]!=0)
// DLOG<<biase_data1[i]<<",yangfei";
// }
// math::matmul<float>(filter_slice, false, col_matrix, false,
// static_cast<float>(1), &out_slice,
// static_cast<float>(1), true, biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false,
&out_slice, p,mode, biase_data,biase_data1);
}
}
}
} // namespace operators
namespace operators {
template <typename P>
void ConvAddAddPReluCompute(const FusionConvAddAddPReluParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
Tensor bias1 = *param.Bias1();
int axis = param.Axis();
Tensor *output = param.Output();
float *biase_data = bias.data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
Tensor aa = *param.InputAlpha();
float *p = aa.data<float>();
std::string mode = param.Mode();
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
Tensor bias1_batch = bias1.Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
Tensor bias1_slice = bias1_batch.Slice(g * out_step, (g + 1) * out_step);
float *biase_data1 = bias1_slice.data<float>();
// int n = bias1_slice.dims()[0];
// int m = bias1_slice.dims()[1];
// for(int i=0;i<n*m;i++){
// if(biase_data1[i]!=0)
// DLOG<<biase_data1[i]<<",yangfei";
// }
// math::matmul<float>(filter_slice, false, col_matrix,
// false,
// static_cast<float>(1),
// &out_slice,
// static_cast<float>(1), true,
// biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice,
p, mode, biase_data, biase_data1);
}
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -23,105 +23,108 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void ConvAddPReluCompute(const FusionConvAddPReluParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
// DLOG<<"yangfei";
// DLOG<<bias.dims();
int axis = param.Axis();
Tensor *output = param.Output();
float *biase_data = bias.data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
Tensor aa = *param.InputAlpha();
float *p = aa.data<float>();
std::string mode = param.Mode();
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
// math::matmul<float>(filter_slice, false, col_matrix, false,
// static_cast<float>(1), &out_slice,
// static_cast<float>(1), true, biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false,
&out_slice, p,mode, biase_data, nullptr);
}
}
}
} // namespace operators
namespace operators {
template <typename P>
void ConvAddPReluCompute(const FusionConvAddPReluParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
// DLOG<<"yangfei";
// DLOG<<bias.dims();
int axis = param.Axis();
Tensor *output = param.Output();
float *biase_data = bias.data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
Tensor aa = *param.InputAlpha();
float *p = aa.data<float>();
std::string mode = param.Mode();
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
// math::matmul<float>(filter_slice, false, col_matrix,
// false,
// static_cast<float>(1),
// &out_slice,
// static_cast<float>(1), true,
// biase_data);
math::matmulWithPRelu(filter_slice, false, col_matrix, false, &out_slice,
p, mode, biase_data, nullptr);
}
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -26,20 +26,20 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
using framework::DDim;
using framework::OpKernelBase;
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddAddPReluParam<DeviceType>> {
public:
void Compute(const FusionConvAddAddPReluParam<DeviceType> &param) const;
bool Init(FusionConvAddAddPReluParam<DeviceType> *param);
};
template <typename DeviceType, typename T>
class ConvAddAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddAddPReluParam<DeviceType>> {
public:
void Compute(const FusionConvAddAddPReluParam<DeviceType> &param) const;
bool Init(FusionConvAddAddPReluParam<DeviceType> *param);
};
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -26,20 +26,20 @@ limitations under the License. */
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
using framework::DDim;
using framework::OpKernelBase;
using framework::DDim;
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class ConvAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddPReluParam<DeviceType>> {
public:
void Compute(const FusionConvAddPReluParam<DeviceType> &param) const;
bool Init(FusionConvAddPReluParam<DeviceType> *param);
};
template <typename DeviceType, typename T>
class ConvAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddPReluParam<DeviceType>> {
public:
void Compute(const FusionConvAddPReluParam<DeviceType> &param) const;
bool Init(FusionConvAddPReluParam<DeviceType> *param);
};
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -3172,7 +3172,7 @@ void SgemmWithPRelu_omp(int m, int n, int k, const float *A, int lda,
int max_threads = 1;
#endif
int L1 = 16 / max_threads * 1024;
int L1 = 32 * 1024;
KC = k;
if (m > n) {
// 对 A 分块
......
......@@ -110,9 +110,8 @@ void matmulWithPRelu(const framework::Tensor &matrix_a, bool trans_a,
int K = (!trans_a) ? dim_a[1] : dim_a[0];
#ifdef _OPENMP
xsSgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K,
matrix_b.data<float>(), N, matrix_out->data<float>(), N,
p, mode, bias, bias1);
SgemmWithPRelu_omp(M, N, K, matrix_a.data<float>(), K, matrix_b.data<float>(),
N, matrix_out->data<float>(), N, p, mode, bias, bias1);
#else
SgemmWithPRelu(M, N, K, matrix_a.data<float>(), K, matrix_b.data<float>(), N,
matrix_out->data<float>(), N, p, mode, bias, bias1);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册